{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b7fc99c3",
   "metadata": {},
   "source": [
    "# Use QR-DQN to Play Pong\n",
    "\n",
    "TensorFlow version"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2b23c9f5",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import copy\n",
    "import logging\n",
    "import itertools\n",
    "import sys\n",
    "\n",
    "import numpy as np\n",
    "np.random.seed(0)\n",
    "import pandas as pd\n",
    "import gym\n",
    "from gym.wrappers.atari_preprocessing import AtariPreprocessing\n",
    "from gym.wrappers.frame_stack import FrameStack\n",
    "import matplotlib.pyplot as plt\n",
    "import tensorflow.compat.v2 as tf\n",
    "tf.random.set_seed(0)\n",
    "from tensorflow import keras\n",
    "from tensorflow import nn\n",
    "from tensorflow import optimizers\n",
    "from tensorflow import losses\n",
    "from tensorflow.keras import layers\n",
    "from tensorflow.keras import models\n",
    "\n",
    "logging.basicConfig(level=logging.DEBUG,\n",
    "        format='%(asctime)s [%(levelname)s] %(message)s',\n",
    "        stream=sys.stdout, datefmt='%H:%M:%S')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "693bdf1e",
   "metadata": {},
   "source": [
    "Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "6ab1cb62",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:07:33 [INFO] env: <AtariPreprocessing<TimeLimit<AtariEnv<PongNoFrameskip-v4>>>>\n",
      "00:07:33 [INFO] action_space: Discrete(6)\n",
      "00:07:33 [INFO] observation_space: Box([[[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]\n",
      "\n",
      " [[0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  ...\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]\n",
      "  [0 0 0 ... 0 0 0]]], [[[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]\n",
      "\n",
      " [[255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  ...\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]\n",
      "  [255 255 255 ... 255 255 255]]], (4, 84, 84), uint8)\n",
      "00:07:33 [INFO] reward_range: (-inf, inf)\n",
      "00:07:33 [INFO] metadata: {'render.modes': ['human', 'rgb_array']}\n",
      "00:07:33 [INFO] num_stack: 4\n",
      "00:07:33 [INFO] lz4_compress: False\n",
      "00:07:33 [INFO] frames: deque([], maxlen=4)\n",
      "00:07:33 [INFO] id: PongNoFrameskip-v4\n",
      "00:07:33 [INFO] entry_point: gym.envs.atari:AtariEnv\n",
      "00:07:33 [INFO] reward_threshold: None\n",
      "00:07:33 [INFO] nondeterministic: False\n",
      "00:07:33 [INFO] max_episode_steps: 400000\n",
      "00:07:33 [INFO] _kwargs: {'game': 'pong', 'obs_type': 'image', 'frameskip': 1}\n",
      "00:07:33 [INFO] _env_name: PongNoFrameskip\n"
     ]
    }
   ],
   "source": [
    "env = FrameStack(AtariPreprocessing(gym.make('PongNoFrameskip-v4')),\n",
    "        num_stack=4)\n",
    "env.env.env.unwrapped.np_random.seed(0) # set seed for noops\n",
    "env.env.env.unwrapped.unwrapped.seed(0) # set seed for AtariEnv\n",
    "for key in vars(env):\n",
    "    logging.info('%s: %s', key, vars(env)[key])\n",
    "for key in vars(env.spec):\n",
    "    logging.info('%s: %s', key, vars(env.spec)[key])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "3747e919",
   "metadata": {},
   "source": [
    "Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5eb74be3",
   "metadata": {},
   "outputs": [],
   "source": [
    "class DQNReplayer:\n",
    "    def __init__(self, capacity):\n",
    "        self.memory = pd.DataFrame(index=range(capacity),\n",
    "                columns=['state', 'action', 'reward', 'next_state', 'done'])\n",
    "        self.i = 0\n",
    "        self.count = 0\n",
    "        self.capacity = capacity\n",
    "\n",
    "    def store(self, *args):\n",
    "        self.memory.loc[self.i] = args\n",
    "        self.i = (self.i + 1) % self.capacity\n",
    "        self.count = min(self.count + 1, self.capacity)\n",
    "\n",
    "    def sample(self, size):\n",
    "        indices = np.random.choice(self.count, size=size)\n",
    "        return (np.stack(self.memory.loc[indices, field]) for field in\n",
    "                self.memory.columns)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a2b9a656",
   "metadata": {},
   "outputs": [],
   "source": [
    "class Agent:\n",
    "    def __init__(self, env):\n",
    "        self.action_n = env.action_space.n\n",
    "        self.gamma = 0.99\n",
    "        self.epsilon = 1.\n",
    "\n",
    "        self.replayer = DQNReplayer(capacity=100000)\n",
    "\n",
    "        quantile_count = 64\n",
    "        self.cumprob_tensor = tf.range(1 / (2 * quantile_count),\n",
    "                1, 1 / quantile_count)[np.newaxis, :, np.newaxis]\n",
    "\n",
    "        self.evaluate_net = self.build_net(self.action_n, quantile_count)\n",
    "        self.target_net = models.clone_model(self.evaluate_net)\n",
    "\n",
    "    def build_net(self, action_n, quantile_count):\n",
    "        net = keras.Sequential([\n",
    "                keras.layers.Permute((2, 3, 1), input_shape=(4, 84, 84)),\n",
    "                layers.Conv2D(32, kernel_size=8, strides=4, activation=nn.relu),\n",
    "                layers.Conv2D(64, kernel_size=4, strides=2, activation=nn.relu),\n",
    "                layers.Conv2D(64, kernel_size=3, strides=1, activation=nn.relu),\n",
    "                layers.Flatten(),\n",
    "                layers.Dense(512, activation=nn.relu),\n",
    "                layers.Dense(action_n * quantile_count),\n",
    "                layers.Reshape((action_n, quantile_count))])\n",
    "        optimizer = optimizers.Adam(0.0001)\n",
    "        net.compile(optimizer=optimizer)\n",
    "        return net\n",
    "\n",
    "    def reset(self, mode=None):\n",
    "        self.mode = mode\n",
    "        if mode == 'train':\n",
    "            self.trajectory = []\n",
    "\n",
    "    def step(self, observation, reward, done):\n",
    "        state_tensor = tf.convert_to_tensor(np.array(observation)[np.newaxis],\n",
    "                dtype=tf.float32)\n",
    "        q_component_tensor = self.evaluate_net(state_tensor)\n",
    "        q_tensor = tf.reduce_mean(q_component_tensor, axis=2)\n",
    "        action_tensor = tf.math.argmax(q_tensor, axis=1)\n",
    "        actions = action_tensor.numpy()\n",
    "        action = actions[0]\n",
    "        if self.mode == 'train':\n",
    "            if np.random.rand() < self.epsilon:\n",
    "                action = np.random.randint(0, self.action_n)\n",
    "\n",
    "            self.trajectory += [observation, reward, done, action]\n",
    "            if len(self.trajectory) >= 8:\n",
    "                state, _, _, act, next_state, reward, done, _ = \\\n",
    "                        self.trajectory[-8:]\n",
    "                self.replayer.store(state, act, reward, next_state, done)\n",
    "            if self.replayer.count >= 1024 and self.replayer.count % 10 == 0:\n",
    "                self.learn()\n",
    "        return action\n",
    "\n",
    "    def close(self):\n",
    "        pass\n",
    "\n",
    "    def update_net(self, target_net, evaluate_net, learning_rate=0.005):\n",
    "        average_weights = [(1. - learning_rate) * t + learning_rate * e for t, e\n",
    "                in zip(target_net.get_weights(), evaluate_net.get_weights())]\n",
    "        target_net.set_weights(average_weights)\n",
    "\n",
    "    def learn(self):\n",
    "        # replay\n",
    "        batch_size = 32\n",
    "        states, actions, rewards, next_states, dones = \\\n",
    "                self.replayer.sample(batch_size)\n",
    "        state_tensor = tf.convert_to_tensor(states, dtype=tf.float32)\n",
    "        reward_tensor = tf.convert_to_tensor(rewards[:, np.newaxis],\n",
    "                dtype=tf.float32)\n",
    "        done_tensor = tf.convert_to_tensor(dones[:, np.newaxis],\n",
    "                dtype=tf.float32)\n",
    "        next_state_tensor = tf.convert_to_tensor(next_states, dtype=tf.float32)\n",
    "\n",
    "        # compute target\n",
    "        next_q_component_tensor = self.evaluate_net(next_state_tensor)\n",
    "        next_q_tensor = tf.reduce_mean(next_q_component_tensor, axis=2)\n",
    "        next_action_tensor = tf.math.argmax(next_q_tensor, axis=1)\n",
    "        next_actions = next_action_tensor.numpy()\n",
    "        all_next_q_quantile_tensor = self.target_net(next_state_tensor)\n",
    "        indices = [[idx, next_action] for idx, next_action in\n",
    "                enumerate(next_actions)]\n",
    "        next_q_quantile_tensor = tf.gather_nd(all_next_q_quantile_tensor,\n",
    "                indices)\n",
    "        target_quantile_tensor = reward_tensor + self.gamma \\\n",
    "                * next_q_quantile_tensor * (1. - done_tensor)\n",
    "\n",
    "        with tf.GradientTape() as tape:\n",
    "            all_q_quantile_tensor = self.evaluate_net(state_tensor)\n",
    "            indices = [[idx, action] for idx, action in enumerate(actions)]\n",
    "            q_quantile_tensor = tf.gather_nd(all_q_quantile_tensor, indices)\n",
    "\n",
    "            target_quantile_tensor = target_quantile_tensor[:, np.newaxis, :]\n",
    "            q_quantile_tensor = q_quantile_tensor[:, :, np.newaxis]\n",
    "            td_error_tensor = target_quantile_tensor - q_quantile_tensor\n",
    "            abs_td_error_tensor = tf.math.abs(td_error_tensor)\n",
    "            hubor_delta = 1.\n",
    "            hubor_loss_tensor = tf.where(abs_td_error_tensor < hubor_delta,\n",
    "                    0.5 * tf.square(td_error_tensor),\n",
    "                    hubor_delta * (abs_td_error_tensor - 0.5 * hubor_delta))\n",
    "            comparison_tensor = tf.cast(td_error_tensor < 0, dtype=tf.float32)\n",
    "            quantile_regression_tensor = tf.math.abs(self.cumprob_tensor -\n",
    "                    comparison_tensor)\n",
    "            quantile_huber_loss_tensor = tf.reduce_mean(tf.reduce_sum(\n",
    "                    hubor_loss_tensor * quantile_regression_tensor, axis=-1),\n",
    "                    axis=1)\n",
    "            loss_tensor = tf.reduce_mean(quantile_huber_loss_tensor)\n",
    "        grads = tape.gradient(loss_tensor, self.evaluate_net.variables)\n",
    "        self.evaluate_net.optimizer.apply_gradients(\n",
    "                zip(grads, self.evaluate_net.variables))\n",
    "\n",
    "        self.update_net(self.target_net, self.evaluate_net)\n",
    "\n",
    "        self.epsilon = max(self.epsilon - 1e-5, 0.05)\n",
    "\n",
    "\n",
    "agent = Agent(env)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f02b76e5",
   "metadata": {},
   "source": [
    "Train & Test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "34ae8868",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "00:07:35 [INFO] ==== train ====\n",
      "00:07:58 [DEBUG] train episode 0: reward = -17.00, steps = 1265\n",
      "00:08:23 [DEBUG] train episode 1: reward = -21.00, steps = 762\n",
      "00:08:53 [DEBUG] train episode 2: reward = -20.00, steps = 960\n",
      "00:09:18 [DEBUG] train episode 3: reward = -21.00, steps = 791\n",
      "00:09:45 [DEBUG] train episode 4: reward = -21.00, steps = 875\n",
      "00:10:10 [DEBUG] train episode 5: reward = -21.00, steps = 820\n",
      "00:10:36 [DEBUG] train episode 6: reward = -21.00, steps = 878\n",
      "00:10:58 [DEBUG] train episode 7: reward = -19.00, steps = 934\n",
      "00:11:17 [DEBUG] train episode 8: reward = -20.00, steps = 837\n",
      "00:11:39 [DEBUG] train episode 9: reward = -20.00, steps = 956\n",
      "00:12:09 [DEBUG] train episode 10: reward = -21.00, steps = 987\n",
      "00:12:35 [DEBUG] train episode 11: reward = -20.00, steps = 861\n",
      "00:12:59 [DEBUG] train episode 12: reward = -21.00, steps = 758\n",
      "00:13:29 [DEBUG] train episode 13: reward = -19.00, steps = 956\n",
      "00:13:56 [DEBUG] train episode 14: reward = -21.00, steps = 846\n",
      "00:14:25 [DEBUG] train episode 15: reward = -21.00, steps = 968\n",
      "00:14:57 [DEBUG] train episode 16: reward = -19.00, steps = 1037\n",
      "00:15:21 [DEBUG] train episode 17: reward = -21.00, steps = 788\n",
      "00:15:52 [DEBUG] train episode 18: reward = -20.00, steps = 1007\n",
      "00:16:18 [DEBUG] train episode 19: reward = -21.00, steps = 843\n",
      "00:16:42 [DEBUG] train episode 20: reward = -21.00, steps = 789\n",
      "00:17:15 [DEBUG] train episode 21: reward = -21.00, steps = 1061\n",
      "00:17:40 [DEBUG] train episode 22: reward = -21.00, steps = 836\n",
      "00:18:04 [DEBUG] train episode 23: reward = -21.00, steps = 823\n",
      "00:18:34 [DEBUG] train episode 24: reward = -21.00, steps = 998\n",
      "00:19:01 [DEBUG] train episode 25: reward = -20.00, steps = 854\n",
      "00:19:26 [DEBUG] train episode 26: reward = -21.00, steps = 810\n",
      "00:19:52 [DEBUG] train episode 27: reward = -21.00, steps = 822\n",
      "00:20:17 [DEBUG] train episode 28: reward = -21.00, steps = 847\n",
      "00:20:43 [DEBUG] train episode 29: reward = -21.00, steps = 819\n",
      "00:21:09 [DEBUG] train episode 30: reward = -21.00, steps = 836\n",
      "00:21:41 [DEBUG] train episode 31: reward = -20.00, steps = 993\n",
      "00:22:08 [DEBUG] train episode 32: reward = -21.00, steps = 848\n",
      "00:22:37 [DEBUG] train episode 33: reward = -21.00, steps = 930\n",
      "00:23:07 [DEBUG] train episode 34: reward = -21.00, steps = 950\n",
      "00:23:43 [DEBUG] train episode 35: reward = -18.00, steps = 1161\n",
      "00:24:24 [DEBUG] train episode 36: reward = -19.00, steps = 1292\n",
      "00:24:57 [DEBUG] train episode 37: reward = -20.00, steps = 1066\n",
      "00:25:33 [DEBUG] train episode 38: reward = -19.00, steps = 1003\n",
      "00:26:03 [DEBUG] train episode 39: reward = -21.00, steps = 819\n",
      "00:26:32 [DEBUG] train episode 40: reward = -21.00, steps = 789\n",
      "00:27:03 [DEBUG] train episode 41: reward = -21.00, steps = 847\n",
      "00:27:36 [DEBUG] train episode 42: reward = -21.00, steps = 910\n",
      "00:28:07 [DEBUG] train episode 43: reward = -21.00, steps = 851\n",
      "00:28:42 [DEBUG] train episode 44: reward = -19.00, steps = 1036\n",
      "00:29:14 [DEBUG] train episode 45: reward = -20.00, steps = 930\n",
      "00:29:40 [DEBUG] train episode 46: reward = -21.00, steps = 851\n",
      "00:30:08 [DEBUG] train episode 47: reward = -20.00, steps = 921\n",
      "00:30:35 [DEBUG] train episode 48: reward = -20.00, steps = 924\n",
      "00:31:08 [DEBUG] train episode 49: reward = -18.00, steps = 1071\n",
      "00:31:38 [DEBUG] train episode 50: reward = -20.00, steps = 980\n",
      "00:32:10 [DEBUG] train episode 51: reward = -21.00, steps = 908\n",
      "00:32:36 [DEBUG] train episode 52: reward = -21.00, steps = 804\n",
      "00:33:06 [DEBUG] train episode 53: reward = -19.00, steps = 993\n",
      "00:33:36 [DEBUG] train episode 54: reward = -20.00, steps = 985\n",
      "00:34:03 [DEBUG] train episode 55: reward = -21.00, steps = 877\n",
      "00:34:32 [DEBUG] train episode 56: reward = -20.00, steps = 942\n",
      "00:34:59 [DEBUG] train episode 57: reward = -20.00, steps = 876\n",
      "00:35:30 [DEBUG] train episode 58: reward = -20.00, steps = 978\n",
      "00:36:03 [DEBUG] train episode 59: reward = -19.00, steps = 1033\n",
      "00:36:29 [DEBUG] train episode 60: reward = -21.00, steps = 818\n",
      "00:36:58 [DEBUG] train episode 61: reward = -20.00, steps = 898\n",
      "00:37:25 [DEBUG] train episode 62: reward = -21.00, steps = 761\n",
      "00:37:54 [DEBUG] train episode 63: reward = -21.00, steps = 898\n",
      "00:38:25 [DEBUG] train episode 64: reward = -21.00, steps = 974\n",
      "00:38:55 [DEBUG] train episode 65: reward = -21.00, steps = 914\n",
      "00:39:28 [DEBUG] train episode 66: reward = -21.00, steps = 1024\n",
      "00:40:00 [DEBUG] train episode 67: reward = -20.00, steps = 997\n",
      "00:40:30 [DEBUG] train episode 68: reward = -19.00, steps = 937\n",
      "00:40:58 [DEBUG] train episode 69: reward = -20.00, steps = 897\n",
      "00:41:27 [DEBUG] train episode 70: reward = -21.00, steps = 930\n",
      "00:41:53 [DEBUG] train episode 71: reward = -21.00, steps = 817\n",
      "00:42:25 [DEBUG] train episode 72: reward = -20.00, steps = 1012\n",
      "00:42:51 [DEBUG] train episode 73: reward = -21.00, steps = 846\n",
      "00:43:27 [DEBUG] train episode 74: reward = -20.00, steps = 1139\n",
      "00:43:59 [DEBUG] train episode 75: reward = -20.00, steps = 1021\n",
      "00:44:25 [DEBUG] train episode 76: reward = -21.00, steps = 825\n",
      "00:44:59 [DEBUG] train episode 77: reward = -21.00, steps = 1077\n",
      "00:45:27 [DEBUG] train episode 78: reward = -20.00, steps = 894\n",
      "00:46:00 [DEBUG] train episode 79: reward = -20.00, steps = 1050\n",
      "00:46:30 [DEBUG] train episode 80: reward = -21.00, steps = 912\n",
      "00:46:59 [DEBUG] train episode 81: reward = -21.00, steps = 943\n",
      "00:47:27 [DEBUG] train episode 82: reward = -21.00, steps = 874\n",
      "00:47:58 [DEBUG] train episode 83: reward = -21.00, steps = 941\n",
      "00:48:28 [DEBUG] train episode 84: reward = -19.00, steps = 950\n",
      "00:48:56 [DEBUG] train episode 85: reward = -21.00, steps = 880\n",
      "00:49:21 [DEBUG] train episode 86: reward = -21.00, steps = 791\n",
      "00:49:46 [DEBUG] train episode 87: reward = -21.00, steps = 776\n",
      "00:50:23 [DEBUG] train episode 88: reward = -18.00, steps = 1170\n",
      "00:50:56 [DEBUG] train episode 89: reward = -20.00, steps = 1024\n",
      "00:51:20 [DEBUG] train episode 90: reward = -21.00, steps = 760\n",
      "00:51:44 [DEBUG] train episode 91: reward = -21.00, steps = 776\n",
      "00:52:12 [DEBUG] train episode 92: reward = -21.00, steps = 848\n",
      "00:52:43 [DEBUG] train episode 93: reward = -21.00, steps = 896\n",
      "00:53:20 [DEBUG] train episode 94: reward = -19.00, steps = 1017\n",
      "00:53:53 [DEBUG] train episode 95: reward = -21.00, steps = 910\n",
      "00:54:25 [DEBUG] train episode 96: reward = -20.00, steps = 924\n",
      "00:55:03 [DEBUG] train episode 97: reward = -19.00, steps = 1146\n",
      "00:55:29 [DEBUG] train episode 98: reward = -21.00, steps = 789\n",
      "00:55:58 [DEBUG] train episode 99: reward = -21.00, steps = 883\n",
      "00:56:44 [DEBUG] train episode 100: reward = -19.00, steps = 1131\n",
      "00:57:33 [DEBUG] train episode 101: reward = -20.00, steps = 887\n",
      "00:58:03 [DEBUG] train episode 102: reward = -20.00, steps = 946\n",
      "00:58:31 [DEBUG] train episode 103: reward = -20.00, steps = 927\n",
      "00:59:04 [DEBUG] train episode 104: reward = -19.00, steps = 1102\n",
      "00:59:31 [DEBUG] train episode 105: reward = -21.00, steps = 883\n",
      "01:00:11 [DEBUG] train episode 106: reward = -19.00, steps = 1313\n",
      "01:01:11 [DEBUG] train episode 107: reward = -21.00, steps = 901\n",
      "01:03:30 [DEBUG] train episode 108: reward = -21.00, steps = 792\n",
      "01:06:36 [DEBUG] train episode 109: reward = -20.00, steps = 1057\n",
      "01:09:09 [DEBUG] train episode 110: reward = -20.00, steps = 879\n",
      "01:11:59 [DEBUG] train episode 111: reward = -21.00, steps = 968\n",
      "01:15:03 [DEBUG] train episode 112: reward = -20.00, steps = 1039\n",
      "01:17:47 [DEBUG] train episode 113: reward = -21.00, steps = 909\n",
      "01:20:25 [DEBUG] train episode 114: reward = -21.00, steps = 848\n",
      "01:23:18 [DEBUG] train episode 115: reward = -21.00, steps = 921\n",
      "01:26:25 [DEBUG] train episode 116: reward = -19.00, steps = 1008\n",
      "01:29:15 [DEBUG] train episode 117: reward = -21.00, steps = 910\n",
      "01:31:55 [DEBUG] train episode 118: reward = -20.00, steps = 866\n",
      "01:34:36 [DEBUG] train episode 119: reward = -20.00, steps = 872\n",
      "01:37:34 [DEBUG] train episode 120: reward = -20.00, steps = 964\n",
      "01:40:13 [DEBUG] train episode 121: reward = -20.00, steps = 865\n",
      "01:43:15 [DEBUG] train episode 122: reward = -20.00, steps = 986\n",
      "01:46:03 [DEBUG] train episode 123: reward = -21.00, steps = 911\n",
      "01:49:06 [DEBUG] train episode 124: reward = -20.00, steps = 991\n",
      "01:52:26 [DEBUG] train episode 125: reward = -18.00, steps = 1081\n",
      "01:55:21 [DEBUG] train episode 126: reward = -21.00, steps = 946\n",
      "01:58:25 [DEBUG] train episode 127: reward = -20.00, steps = 988\n",
      "02:01:20 [DEBUG] train episode 128: reward = -21.00, steps = 937\n",
      "02:04:43 [DEBUG] train episode 129: reward = -18.00, steps = 1100\n",
      "02:07:35 [DEBUG] train episode 130: reward = -21.00, steps = 930\n",
      "02:10:41 [DEBUG] train episode 131: reward = -19.00, steps = 1014\n",
      "02:14:10 [DEBUG] train episode 132: reward = -19.00, steps = 1135\n",
      "02:17:30 [DEBUG] train episode 133: reward = -20.00, steps = 1089\n",
      "02:21:36 [DEBUG] train episode 134: reward = -18.00, steps = 1327\n",
      "02:25:20 [DEBUG] train episode 135: reward = -19.00, steps = 1213\n",
      "02:28:57 [DEBUG] train episode 136: reward = -18.00, steps = 1178\n",
      "02:32:49 [DEBUG] train episode 137: reward = -19.00, steps = 1254\n",
      "02:36:55 [DEBUG] train episode 138: reward = -21.00, steps = 1333\n",
      "02:41:32 [DEBUG] train episode 139: reward = -18.00, steps = 1504\n",
      "02:45:41 [DEBUG] train episode 140: reward = -20.00, steps = 1350\n",
      "02:50:26 [DEBUG] train episode 141: reward = -15.00, steps = 1545\n",
      "02:55:57 [DEBUG] train episode 142: reward = -18.00, steps = 1799\n",
      "03:00:35 [DEBUG] train episode 143: reward = -17.00, steps = 1501\n",
      "03:05:41 [DEBUG] train episode 144: reward = -17.00, steps = 1670\n",
      "03:10:21 [DEBUG] train episode 145: reward = -19.00, steps = 1540\n",
      "03:17:03 [DEBUG] train episode 146: reward = -13.00, steps = 2200\n",
      "03:21:25 [DEBUG] train episode 147: reward = -20.00, steps = 1432\n",
      "03:26:09 [DEBUG] train episode 148: reward = -19.00, steps = 1560\n",
      "03:31:59 [DEBUG] train episode 149: reward = -16.00, steps = 1900\n",
      "03:38:04 [DEBUG] train episode 150: reward = -17.00, steps = 2011\n",
      "03:42:51 [DEBUG] train episode 151: reward = -20.00, steps = 1587\n",
      "03:49:19 [DEBUG] train episode 152: reward = -12.00, steps = 2145\n",
      "03:55:17 [DEBUG] train episode 153: reward = -14.00, steps = 1981\n",
      "04:01:02 [DEBUG] train episode 154: reward = -15.00, steps = 1898\n",
      "04:06:14 [DEBUG] train episode 155: reward = -18.00, steps = 1743\n",
      "04:11:30 [DEBUG] train episode 156: reward = -19.00, steps = 1774\n",
      "04:16:57 [DEBUG] train episode 157: reward = -18.00, steps = 1829\n",
      "04:22:51 [DEBUG] train episode 158: reward = -13.00, steps = 1980\n",
      "04:29:05 [DEBUG] train episode 159: reward = -18.00, steps = 2082\n",
      "04:33:53 [DEBUG] train episode 160: reward = -17.00, steps = 1597\n",
      "04:39:23 [DEBUG] train episode 161: reward = -16.00, steps = 1835\n",
      "04:46:41 [DEBUG] train episode 162: reward = -11.00, steps = 2420\n",
      "04:51:48 [DEBUG] train episode 163: reward = -16.00, steps = 1713\n",
      "04:55:37 [DEBUG] train episode 164: reward = -20.00, steps = 1278\n",
      "05:02:14 [DEBUG] train episode 165: reward = -10.00, steps = 2203\n",
      "05:07:22 [DEBUG] train episode 166: reward = -15.00, steps = 1713\n",
      "05:13:09 [DEBUG] train episode 167: reward = -16.00, steps = 1938\n",
      "05:18:04 [DEBUG] train episode 168: reward = -13.00, steps = 1643\n",
      "05:22:30 [DEBUG] train episode 169: reward = -16.00, steps = 1489\n",
      "05:29:01 [DEBUG] train episode 170: reward = -12.00, steps = 2193\n",
      "05:34:42 [DEBUG] train episode 171: reward = -11.00, steps = 1919\n",
      "05:38:47 [DEBUG] train episode 172: reward = -17.00, steps = 1377\n",
      "05:42:50 [DEBUG] train episode 173: reward = -15.00, steps = 1373\n",
      "05:48:17 [DEBUG] train episode 174: reward = -13.00, steps = 1846\n",
      "05:53:53 [DEBUG] train episode 175: reward = -15.00, steps = 1903\n",
      "06:00:27 [DEBUG] train episode 176: reward = -7.00, steps = 2216\n",
      "06:09:06 [DEBUG] train episode 177: reward = -5.00, steps = 2931\n",
      "06:16:54 [DEBUG] train episode 178: reward = -6.00, steps = 2658\n",
      "06:24:14 [DEBUG] train episode 179: reward = -5.00, steps = 2492\n",
      "06:33:02 [DEBUG] train episode 180: reward = -2.00, steps = 3016\n",
      "06:41:03 [DEBUG] train episode 181: reward = -1.00, steps = 2742\n",
      "06:46:34 [DEBUG] train episode 182: reward = -11.00, steps = 1888\n",
      "06:53:30 [DEBUG] train episode 183: reward = -9.00, steps = 2362\n",
      "07:00:59 [DEBUG] train episode 184: reward = -7.00, steps = 2553\n",
      "07:09:07 [DEBUG] train episode 185: reward = -3.00, steps = 2777\n",
      "07:15:35 [DEBUG] train episode 186: reward = -9.00, steps = 2204\n",
      "07:22:29 [DEBUG] train episode 187: reward = -6.00, steps = 2339\n",
      "07:26:50 [DEBUG] train episode 188: reward = -15.00, steps = 1477\n",
      "07:35:53 [DEBUG] train episode 189: reward = -4.00, steps = 3069\n",
      "07:44:09 [DEBUG] train episode 190: reward = -2.00, steps = 2819\n",
      "07:50:13 [DEBUG] train episode 191: reward = -8.00, steps = 2069\n",
      "07:56:51 [DEBUG] train episode 192: reward = -8.00, steps = 2237\n",
      "08:03:22 [DEBUG] train episode 193: reward = -10.00, steps = 2203\n",
      "08:08:04 [DEBUG] train episode 194: reward = -14.00, steps = 1604\n",
      "08:12:46 [DEBUG] train episode 195: reward = -13.00, steps = 1610\n",
      "08:15:36 [DEBUG] train episode 196: reward = -20.00, steps = 963\n",
      "08:21:59 [DEBUG] train episode 197: reward = -7.00, steps = 2175\n",
      "08:25:24 [DEBUG] train episode 198: reward = -17.00, steps = 1168\n",
      "08:32:23 [DEBUG] train episode 199: reward = -6.00, steps = 2397\n",
      "08:38:43 [DEBUG] train episode 200: reward = -6.00, steps = 2178\n",
      "08:43:19 [DEBUG] train episode 201: reward = -13.00, steps = 1563\n",
      "08:47:42 [DEBUG] train episode 202: reward = -17.00, steps = 1492\n",
      "08:54:06 [DEBUG] train episode 203: reward = -6.00, steps = 2193\n",
      "09:00:27 [DEBUG] train episode 204: reward = -6.00, steps = 2180\n",
      "09:05:35 [DEBUG] train episode 205: reward = -11.00, steps = 1756\n",
      "09:10:54 [DEBUG] train episode 206: reward = -11.00, steps = 1822\n",
      "09:15:36 [DEBUG] train episode 207: reward = -13.00, steps = 1608\n",
      "09:19:52 [DEBUG] train episode 208: reward = -15.00, steps = 1453\n",
      "09:26:13 [DEBUG] train episode 209: reward = -8.00, steps = 2151\n",
      "09:31:34 [DEBUG] train episode 210: reward = -11.00, steps = 1824\n",
      "09:37:11 [DEBUG] train episode 211: reward = -14.00, steps = 1654\n",
      "09:41:11 [DEBUG] train episode 212: reward = -17.00, steps = 1291\n",
      "09:45:14 [DEBUG] train episode 213: reward = -17.00, steps = 1351\n",
      "09:49:31 [DEBUG] train episode 214: reward = -15.00, steps = 1434\n",
      "09:55:01 [DEBUG] train episode 215: reward = -12.00, steps = 1711\n",
      "09:58:54 [DEBUG] train episode 216: reward = -17.00, steps = 1285\n",
      "10:05:21 [DEBUG] train episode 217: reward = -7.00, steps = 2134\n",
      "10:11:02 [DEBUG] train episode 218: reward = -10.00, steps = 1779\n",
      "10:14:51 [DEBUG] train episode 219: reward = -17.00, steps = 1171\n",
      "10:19:28 [DEBUG] train episode 220: reward = -14.00, steps = 1493\n",
      "10:24:33 [DEBUG] train episode 221: reward = -13.00, steps = 1599\n",
      "10:27:02 [DEBUG] train episode 222: reward = -21.00, steps = 809\n",
      "10:29:38 [DEBUG] train episode 223: reward = -20.00, steps = 835\n",
      "10:32:01 [DEBUG] train episode 224: reward = -21.00, steps = 787\n",
      "10:36:26 [DEBUG] train episode 225: reward = -14.00, steps = 1455\n",
      "10:41:36 [DEBUG] train episode 226: reward = -12.00, steps = 1714\n",
      "10:44:00 [DEBUG] train episode 227: reward = -21.00, steps = 792\n",
      "10:49:07 [DEBUG] train episode 228: reward = -11.00, steps = 1691\n",
      "10:54:47 [DEBUG] train episode 229: reward = -11.00, steps = 1886\n",
      "11:00:05 [DEBUG] train episode 230: reward = -13.00, steps = 1759\n",
      "11:05:19 [DEBUG] train episode 231: reward = -13.00, steps = 1733\n",
      "11:09:48 [DEBUG] train episode 232: reward = -16.00, steps = 1496\n",
      "11:15:01 [DEBUG] train episode 233: reward = -14.00, steps = 1743\n",
      "11:20:56 [DEBUG] train episode 234: reward = -9.00, steps = 1961\n",
      "11:26:54 [DEBUG] train episode 235: reward = -10.00, steps = 1983\n",
      "11:34:09 [DEBUG] train episode 236: reward = -7.00, steps = 2394\n",
      "11:41:16 [DEBUG] train episode 237: reward = -7.00, steps = 2354\n",
      "11:44:42 [DEBUG] train episode 238: reward = -17.00, steps = 1127\n",
      "11:48:52 [DEBUG] train episode 239: reward = -16.00, steps = 1370\n",
      "11:56:52 [DEBUG] train episode 240: reward = -3.00, steps = 2641\n",
      "12:00:18 [DEBUG] train episode 241: reward = -17.00, steps = 1126\n",
      "12:07:37 [DEBUG] train episode 242: reward = -6.00, steps = 2406\n",
      "12:10:55 [DEBUG] train episode 243: reward = -18.00, steps = 1090\n",
      "12:16:42 [DEBUG] train episode 244: reward = -10.00, steps = 1911\n",
      "12:20:42 [DEBUG] train episode 245: reward = -15.00, steps = 1316\n",
      "12:26:46 [DEBUG] train episode 246: reward = -11.00, steps = 2015\n",
      "12:32:22 [DEBUG] train episode 247: reward = -10.00, steps = 1855\n",
      "12:39:00 [DEBUG] train episode 248: reward = -5.00, steps = 2207\n",
      "12:45:50 [DEBUG] train episode 249: reward = -6.00, steps = 2274\n",
      "12:51:38 [DEBUG] train episode 250: reward = -11.00, steps = 1915\n",
      "12:57:47 [DEBUG] train episode 251: reward = -9.00, steps = 2030\n",
      "13:03:22 [DEBUG] train episode 252: reward = -11.00, steps = 1807\n",
      "13:09:16 [DEBUG] train episode 253: reward = -9.00, steps = 1955\n",
      "13:15:34 [DEBUG] train episode 254: reward = -8.00, steps = 2097\n",
      "13:20:16 [DEBUG] train episode 255: reward = -13.00, steps = 1566\n",
      "13:26:13 [DEBUG] train episode 256: reward = -9.00, steps = 1969\n",
      "13:30:53 [DEBUG] train episode 257: reward = -14.00, steps = 1551\n",
      "13:36:07 [DEBUG] train episode 258: reward = -11.00, steps = 1740\n",
      "13:42:27 [DEBUG] train episode 259: reward = -8.00, steps = 2108\n",
      "13:48:40 [DEBUG] train episode 260: reward = -11.00, steps = 1996\n",
      "13:55:42 [DEBUG] train episode 261: reward = -7.00, steps = 2143\n",
      "14:02:10 [DEBUG] train episode 262: reward = -7.00, steps = 2102\n",
      "14:09:00 [DEBUG] train episode 263: reward = -9.00, steps = 2103\n",
      "14:16:11 [DEBUG] train episode 264: reward = -6.00, steps = 2303\n",
      "14:20:51 [DEBUG] train episode 265: reward = -16.00, steps = 1492\n",
      "14:27:52 [DEBUG] train episode 266: reward = -7.00, steps = 2193\n",
      "14:34:51 [DEBUG] train episode 267: reward = -9.00, steps = 2133\n",
      "14:41:06 [DEBUG] train episode 268: reward = -11.00, steps = 1814\n",
      "14:47:18 [DEBUG] train episode 269: reward = -10.00, steps = 1980\n",
      "14:54:15 [DEBUG] train episode 270: reward = -8.00, steps = 2032\n",
      "15:00:26 [DEBUG] train episode 271: reward = -10.00, steps = 1871\n",
      "15:06:45 [DEBUG] train episode 272: reward = -10.00, steps = 1834\n",
      "15:12:23 [DEBUG] train episode 273: reward = -14.00, steps = 1697\n",
      "15:19:40 [DEBUG] train episode 274: reward = -6.00, steps = 2249\n",
      "15:25:27 [DEBUG] train episode 275: reward = -12.00, steps = 1796\n",
      "15:31:08 [DEBUG] train episode 276: reward = -12.00, steps = 1774\n",
      "15:36:40 [DEBUG] train episode 277: reward = -13.00, steps = 1668\n",
      "15:43:34 [DEBUG] train episode 278: reward = -9.00, steps = 2162\n",
      "15:49:43 [DEBUG] train episode 279: reward = -10.00, steps = 2027\n",
      "15:55:49 [DEBUG] train episode 280: reward = -9.00, steps = 2032\n",
      "16:01:52 [DEBUG] train episode 281: reward = -9.00, steps = 2004\n",
      "16:07:31 [DEBUG] train episode 282: reward = -11.00, steps = 1871\n",
      "16:13:22 [DEBUG] train episode 283: reward = -9.00, steps = 1908\n",
      "16:19:31 [DEBUG] train episode 284: reward = -9.00, steps = 1991\n",
      "16:24:58 [DEBUG] train episode 285: reward = -13.00, steps = 1652\n",
      "16:30:27 [DEBUG] train episode 286: reward = -13.00, steps = 1546\n",
      "16:37:57 [DEBUG] train episode 287: reward = -9.00, steps = 2125\n",
      "16:44:00 [DEBUG] train episode 288: reward = -13.00, steps = 1700\n",
      "16:49:20 [DEBUG] train episode 289: reward = -13.00, steps = 1613\n",
      "16:55:13 [DEBUG] train episode 290: reward = -10.00, steps = 1867\n",
      "17:01:05 [DEBUG] train episode 291: reward = -11.00, steps = 1853\n",
      "17:07:13 [DEBUG] train episode 292: reward = -10.00, steps = 1939\n",
      "17:12:58 [DEBUG] train episode 293: reward = -13.00, steps = 1792\n",
      "17:18:58 [DEBUG] train episode 294: reward = -11.00, steps = 1909\n",
      "17:23:56 [DEBUG] train episode 295: reward = -13.00, steps = 1595\n",
      "17:30:25 [DEBUG] train episode 296: reward = -9.00, steps = 1948\n",
      "17:37:32 [DEBUG] train episode 297: reward = -8.00, steps = 2081\n",
      "17:44:24 [DEBUG] train episode 298: reward = -8.00, steps = 2046\n",
      "17:51:39 [DEBUG] train episode 299: reward = -7.00, steps = 2133\n",
      "17:57:52 [DEBUG] train episode 300: reward = -10.00, steps = 1896\n",
      "18:04:26 [DEBUG] train episode 301: reward = -9.00, steps = 2034\n",
      "18:07:53 [DEBUG] train episode 302: reward = -17.00, steps = 1124\n",
      "18:13:35 [DEBUG] train episode 303: reward = -12.00, steps = 1834\n",
      "18:19:35 [DEBUG] train episode 304: reward = -10.00, steps = 1912\n",
      "18:28:13 [DEBUG] train episode 305: reward = 1.00, steps = 2804\n",
      "18:33:09 [DEBUG] train episode 306: reward = -13.00, steps = 1592\n",
      "18:38:42 [DEBUG] train episode 307: reward = -12.00, steps = 1793\n",
      "18:43:42 [DEBUG] train episode 308: reward = -13.00, steps = 1634\n",
      "18:48:17 [DEBUG] train episode 309: reward = -15.00, steps = 1486\n",
      "18:55:53 [DEBUG] train episode 310: reward = -3.00, steps = 2463\n",
      "19:00:48 [DEBUG] train episode 311: reward = -14.00, steps = 1597\n",
      "19:06:18 [DEBUG] train episode 312: reward = -13.00, steps = 1766\n",
      "19:10:58 [DEBUG] train episode 313: reward = -15.00, steps = 1436\n",
      "19:14:58 [DEBUG] train episode 314: reward = -16.00, steps = 1297\n",
      "19:20:28 [DEBUG] train episode 315: reward = -11.00, steps = 1756\n",
      "19:25:35 [DEBUG] train episode 316: reward = -13.00, steps = 1628\n",
      "19:29:16 [DEBUG] train episode 317: reward = -17.00, steps = 1188\n",
      "19:32:22 [DEBUG] train episode 318: reward = -19.00, steps = 982\n",
      "19:36:58 [DEBUG] train episode 319: reward = -15.00, steps = 1484\n",
      "19:41:47 [DEBUG] train episode 320: reward = -12.00, steps = 1565\n",
      "19:46:50 [DEBUG] train episode 321: reward = -13.00, steps = 1644\n",
      "19:52:26 [DEBUG] train episode 322: reward = -10.00, steps = 1826\n",
      "19:56:28 [DEBUG] train episode 323: reward = -17.00, steps = 1311\n",
      "20:00:29 [DEBUG] train episode 324: reward = -17.00, steps = 1315\n",
      "20:04:00 [DEBUG] train episode 325: reward = -18.00, steps = 1147\n",
      "20:07:43 [DEBUG] train episode 326: reward = -18.00, steps = 1212\n",
      "20:12:59 [DEBUG] train episode 327: reward = -13.00, steps = 1723\n",
      "20:16:37 [DEBUG] train episode 328: reward = -18.00, steps = 1186\n",
      "20:20:06 [DEBUG] train episode 329: reward = -17.00, steps = 1125\n",
      "20:23:26 [DEBUG] train episode 330: reward = -20.00, steps = 1075\n",
      "20:27:03 [DEBUG] train episode 331: reward = -17.00, steps = 1171\n",
      "20:30:14 [DEBUG] train episode 332: reward = -19.00, steps = 1025\n",
      "20:33:52 [DEBUG] train episode 333: reward = -17.00, steps = 1175\n",
      "20:38:54 [DEBUG] train episode 334: reward = -15.00, steps = 1631\n",
      "20:41:51 [DEBUG] train episode 335: reward = -20.00, steps = 956\n",
      "20:46:06 [DEBUG] train episode 336: reward = -15.00, steps = 1381\n",
      "20:49:57 [DEBUG] train episode 337: reward = -18.00, steps = 1250\n",
      "20:55:26 [DEBUG] train episode 338: reward = -12.00, steps = 1781\n",
      "20:59:49 [DEBUG] train episode 339: reward = -15.00, steps = 1421\n",
      "21:02:35 [DEBUG] train episode 340: reward = -20.00, steps = 892\n",
      "21:06:15 [DEBUG] train episode 341: reward = -18.00, steps = 1187\n",
      "21:10:29 [DEBUG] train episode 342: reward = -15.00, steps = 1379\n",
      "21:13:35 [DEBUG] train episode 343: reward = -19.00, steps = 1006\n",
      "21:17:03 [DEBUG] train episode 344: reward = -19.00, steps = 1021\n",
      "21:22:44 [DEBUG] train episode 345: reward = -12.00, steps = 1742\n",
      "21:27:05 [DEBUG] train episode 346: reward = -16.00, steps = 1266\n",
      "21:30:44 [DEBUG] train episode 347: reward = -18.00, steps = 1036\n",
      "21:36:15 [DEBUG] train episode 348: reward = -14.00, steps = 1557\n",
      "21:39:42 [DEBUG] train episode 349: reward = -20.00, steps = 1017\n",
      "21:44:38 [DEBUG] train episode 350: reward = -16.00, steps = 1599\n",
      "21:47:32 [DEBUG] train episode 351: reward = -19.00, steps = 933\n",
      "21:51:50 [DEBUG] train episode 352: reward = -18.00, steps = 1338\n",
      "21:56:14 [DEBUG] train episode 353: reward = -15.00, steps = 1389\n",
      "22:00:19 [DEBUG] train episode 354: reward = -18.00, steps = 1155\n",
      "22:04:17 [DEBUG] train episode 355: reward = -18.00, steps = 1150\n",
      "22:10:21 [DEBUG] train episode 356: reward = -12.00, steps = 1796\n",
      "22:14:03 [DEBUG] train episode 357: reward = -18.00, steps = 1105\n",
      "22:17:53 [DEBUG] train episode 358: reward = -17.00, steps = 1165\n",
      "22:21:08 [DEBUG] train episode 359: reward = -19.00, steps = 1002\n",
      "22:25:05 [DEBUG] train episode 360: reward = -19.00, steps = 1162\n",
      "22:28:04 [DEBUG] train episode 361: reward = -21.00, steps = 874\n",
      "22:33:35 [DEBUG] train episode 362: reward = -14.00, steps = 1636\n",
      "22:37:20 [DEBUG] train episode 363: reward = -18.00, steps = 1136\n",
      "22:40:32 [DEBUG] train episode 364: reward = -19.00, steps = 963\n",
      "22:46:32 [DEBUG] train episode 365: reward = -11.00, steps = 1792\n",
      "22:50:24 [DEBUG] train episode 366: reward = -18.00, steps = 1190\n",
      "22:54:17 [DEBUG] train episode 367: reward = -18.00, steps = 1195\n",
      "22:59:35 [DEBUG] train episode 368: reward = -16.00, steps = 1463\n",
      "23:03:35 [DEBUG] train episode 369: reward = -17.00, steps = 1281\n",
      "23:06:16 [DEBUG] train episode 370: reward = -21.00, steps = 857\n",
      "23:09:38 [DEBUG] train episode 371: reward = -18.00, steps = 1090\n",
      "23:12:55 [DEBUG] train episode 372: reward = -19.00, steps = 1061\n",
      "23:16:20 [DEBUG] train episode 373: reward = -19.00, steps = 1092\n",
      "23:20:07 [DEBUG] train episode 374: reward = -18.00, steps = 1223\n",
      "23:24:09 [DEBUG] train episode 375: reward = -16.00, steps = 1241\n",
      "23:29:20 [DEBUG] train episode 376: reward = -12.00, steps = 1661\n",
      "23:33:53 [DEBUG] train episode 377: reward = -17.00, steps = 1302\n",
      "23:37:17 [DEBUG] train episode 378: reward = -19.00, steps = 1008\n",
      "23:40:33 [DEBUG] train episode 379: reward = -20.00, steps = 1003\n",
      "23:44:40 [DEBUG] train episode 380: reward = -16.00, steps = 1268\n",
      "23:48:45 [DEBUG] train episode 381: reward = -18.00, steps = 1245\n",
      "23:54:02 [DEBUG] train episode 382: reward = -12.00, steps = 1631\n",
      "23:57:33 [DEBUG] train episode 383: reward = -19.00, steps = 1085\n",
      "00:01:39 [DEBUG] train episode 384: reward = -16.00, steps = 1303\n",
      "00:05:22 [DEBUG] train episode 385: reward = -19.00, steps = 1192\n",
      "00:12:35 [DEBUG] train episode 386: reward = -10.00, steps = 2303\n",
      "00:17:35 [DEBUG] train episode 387: reward = -13.00, steps = 1593\n",
      "00:20:52 [DEBUG] train episode 388: reward = -19.00, steps = 1018\n",
      "00:24:22 [DEBUG] train episode 389: reward = -19.00, steps = 1081\n",
      "00:27:45 [DEBUG] train episode 390: reward = -18.00, steps = 1073\n",
      "00:30:30 [DEBUG] train episode 391: reward = -20.00, steps = 888\n",
      "00:33:41 [DEBUG] train episode 392: reward = -19.00, steps = 1013\n",
      "00:37:14 [DEBUG] train episode 393: reward = -18.00, steps = 1146\n",
      "00:40:28 [DEBUG] train episode 394: reward = -21.00, steps = 948\n",
      "00:44:07 [DEBUG] train episode 395: reward = -19.00, steps = 1221\n",
      "00:47:26 [DEBUG] train episode 396: reward = -18.00, steps = 1116\n",
      "00:50:20 [DEBUG] train episode 397: reward = -20.00, steps = 979\n",
      "00:54:10 [DEBUG] train episode 398: reward = -16.00, steps = 1288\n",
      "00:57:59 [DEBUG] train episode 399: reward = -18.00, steps = 1290\n",
      "01:02:41 [DEBUG] train episode 400: reward = -14.00, steps = 1589\n",
      "01:06:22 [DEBUG] train episode 401: reward = -17.00, steps = 1245\n",
      "01:09:07 [DEBUG] train episode 402: reward = -19.00, steps = 933\n",
      "01:13:12 [DEBUG] train episode 403: reward = -18.00, steps = 1381\n",
      "01:16:24 [DEBUG] train episode 404: reward = -19.00, steps = 1085\n",
      "01:19:25 [DEBUG] train episode 405: reward = -19.00, steps = 1021\n",
      "01:23:28 [DEBUG] train episode 406: reward = -17.00, steps = 1359\n",
      "01:27:35 [DEBUG] train episode 407: reward = -17.00, steps = 1396\n",
      "01:31:16 [DEBUG] train episode 408: reward = -17.00, steps = 1243\n",
      "01:34:13 [DEBUG] train episode 409: reward = -18.00, steps = 997\n",
      "01:38:08 [DEBUG] train episode 410: reward = -17.00, steps = 1325\n",
      "01:41:38 [DEBUG] train episode 411: reward = -19.00, steps = 1180\n",
      "01:45:01 [DEBUG] train episode 412: reward = -18.00, steps = 1145\n",
      "01:48:15 [DEBUG] train episode 413: reward = -19.00, steps = 1092\n",
      "01:52:09 [DEBUG] train episode 414: reward = -17.00, steps = 1316\n",
      "01:57:53 [DEBUG] train episode 415: reward = -9.00, steps = 1941\n",
      "02:02:14 [DEBUG] train episode 416: reward = -16.00, steps = 1460\n",
      "02:07:49 [DEBUG] train episode 417: reward = -11.00, steps = 1880\n",
      "02:10:49 [DEBUG] train episode 418: reward = -19.00, steps = 1015\n",
      "02:13:57 [DEBUG] train episode 419: reward = -18.00, steps = 1056\n",
      "02:17:51 [DEBUG] train episode 420: reward = -15.00, steps = 1317\n",
      "02:21:18 [DEBUG] train episode 421: reward = -19.00, steps = 1153\n",
      "02:27:03 [DEBUG] train episode 422: reward = -11.00, steps = 1940\n",
      "02:31:25 [DEBUG] train episode 423: reward = -16.00, steps = 1474\n",
      "02:36:57 [DEBUG] train episode 424: reward = -11.00, steps = 1865\n",
      "02:41:37 [DEBUG] train episode 425: reward = -14.00, steps = 1572\n",
      "02:45:47 [DEBUG] train episode 426: reward = -16.00, steps = 1402\n",
      "02:50:15 [DEBUG] train episode 427: reward = -15.00, steps = 1506\n",
      "02:53:44 [DEBUG] train episode 428: reward = -19.00, steps = 1175\n",
      "02:59:28 [DEBUG] train episode 429: reward = -10.00, steps = 1937\n",
      "03:03:44 [DEBUG] train episode 430: reward = -16.00, steps = 1433\n",
      "03:08:20 [DEBUG] train episode 431: reward = -15.00, steps = 1552\n",
      "03:13:26 [DEBUG] train episode 432: reward = -12.00, steps = 1726\n",
      "03:20:19 [DEBUG] train episode 433: reward = -6.00, steps = 2326\n",
      "03:24:16 [DEBUG] train episode 434: reward = -17.00, steps = 1294\n",
      "03:28:11 [DEBUG] train episode 435: reward = -19.00, steps = 1249\n",
      "03:34:47 [DEBUG] train episode 436: reward = -8.00, steps = 2204\n",
      "03:38:38 [DEBUG] train episode 437: reward = -16.00, steps = 1294\n",
      "03:43:27 [DEBUG] train episode 438: reward = -13.00, steps = 1622\n",
      "03:48:01 [DEBUG] train episode 439: reward = -15.00, steps = 1532\n",
      "03:51:03 [DEBUG] train episode 440: reward = -20.00, steps = 1022\n",
      "03:55:55 [DEBUG] train episode 441: reward = -15.00, steps = 1635\n",
      "04:00:11 [DEBUG] train episode 442: reward = -14.00, steps = 1431\n",
      "04:05:28 [DEBUG] train episode 443: reward = -13.00, steps = 1763\n",
      "04:09:33 [DEBUG] train episode 444: reward = -16.00, steps = 1369\n",
      "04:17:35 [DEBUG] train episode 445: reward = -2.00, steps = 2687\n",
      "04:22:09 [DEBUG] train episode 446: reward = -14.00, steps = 1516\n",
      "04:30:30 [DEBUG] train episode 447: reward = -1.00, steps = 2780\n",
      "04:35:22 [DEBUG] train episode 448: reward = -12.00, steps = 1630\n",
      "04:41:33 [DEBUG] train episode 449: reward = 18.00, steps = 2079\n",
      "04:47:11 [DEBUG] train episode 450: reward = -12.00, steps = 1882\n",
      "04:51:37 [DEBUG] train episode 451: reward = -14.00, steps = 1485\n",
      "04:55:56 [DEBUG] train episode 452: reward = -15.00, steps = 1447\n",
      "04:59:48 [DEBUG] train episode 453: reward = -17.00, steps = 1291\n",
      "05:04:25 [DEBUG] train episode 454: reward = -13.00, steps = 1549\n",
      "05:08:52 [DEBUG] train episode 455: reward = -16.00, steps = 1485\n",
      "05:12:34 [DEBUG] train episode 456: reward = -17.00, steps = 1240\n",
      "05:19:18 [DEBUG] train episode 457: reward = -7.00, steps = 2269\n",
      "05:23:33 [DEBUG] train episode 458: reward = -16.00, steps = 1430\n",
      "05:30:28 [DEBUG] train episode 459: reward = -5.00, steps = 2336\n",
      "05:34:26 [DEBUG] train episode 460: reward = -15.00, steps = 1342\n",
      "05:37:39 [DEBUG] train episode 461: reward = -18.00, steps = 1088\n",
      "05:42:55 [DEBUG] train episode 462: reward = -11.00, steps = 1783\n",
      "05:48:11 [DEBUG] train episode 463: reward = -10.00, steps = 1789\n",
      "05:53:27 [DEBUG] train episode 464: reward = -13.00, steps = 1787\n",
      "05:57:38 [DEBUG] train episode 465: reward = -15.00, steps = 1413\n",
      "06:02:11 [DEBUG] train episode 466: reward = -16.00, steps = 1529\n",
      "06:06:59 [DEBUG] train episode 467: reward = -15.00, steps = 1622\n",
      "06:09:41 [DEBUG] train episode 468: reward = -21.00, steps = 911\n",
      "06:15:26 [DEBUG] train episode 469: reward = -11.00, steps = 1939\n",
      "06:20:06 [DEBUG] train episode 470: reward = -13.00, steps = 1578\n",
      "06:25:12 [DEBUG] train episode 471: reward = -14.00, steps = 1713\n",
      "06:29:09 [DEBUG] train episode 472: reward = -16.00, steps = 1328\n",
      "06:33:44 [DEBUG] train episode 473: reward = -15.00, steps = 1544\n",
      "06:38:10 [DEBUG] train episode 474: reward = -15.00, steps = 1492\n",
      "06:41:30 [DEBUG] train episode 475: reward = -19.00, steps = 1116\n",
      "06:48:04 [DEBUG] train episode 476: reward = -10.00, steps = 2215\n",
      "06:55:10 [DEBUG] train episode 477: reward = -4.00, steps = 2398\n",
      "06:59:46 [DEBUG] train episode 478: reward = -14.00, steps = 1553\n",
      "07:05:37 [DEBUG] train episode 479: reward = -11.00, steps = 1977\n",
      "07:09:43 [DEBUG] train episode 480: reward = -15.00, steps = 1382\n",
      "07:15:03 [DEBUG] train episode 481: reward = -10.00, steps = 1802\n",
      "07:22:26 [DEBUG] train episode 482: reward = -2.00, steps = 2487\n",
      "07:27:48 [DEBUG] train episode 483: reward = -10.00, steps = 1809\n",
      "07:31:48 [DEBUG] train episode 484: reward = -16.00, steps = 1344\n",
      "07:36:13 [DEBUG] train episode 485: reward = -14.00, steps = 1488\n",
      "07:42:30 [DEBUG] train episode 486: reward = -7.00, steps = 2126\n",
      "07:46:51 [DEBUG] train episode 487: reward = -15.00, steps = 1470\n",
      "07:53:11 [DEBUG] train episode 488: reward = -7.00, steps = 2141\n",
      "08:00:01 [DEBUG] train episode 489: reward = -6.00, steps = 2310\n",
      "08:07:52 [DEBUG] train episode 490: reward = -2.00, steps = 2654\n",
      "08:15:49 [DEBUG] train episode 491: reward = 1.00, steps = 2682\n",
      "08:23:37 [DEBUG] train episode 492: reward = 1.00, steps = 2627\n",
      "08:29:18 [DEBUG] train episode 493: reward = 18.00, steps = 1911\n",
      "08:33:49 [DEBUG] train episode 494: reward = -15.00, steps = 1535\n",
      "08:39:04 [DEBUG] train episode 495: reward = -12.00, steps = 1787\n",
      "08:45:49 [DEBUG] train episode 496: reward = -6.00, steps = 2281\n",
      "08:50:17 [DEBUG] train episode 497: reward = -15.00, steps = 1519\n",
      "08:55:19 [DEBUG] train episode 498: reward = -13.00, steps = 1708\n",
      "09:00:44 [DEBUG] train episode 499: reward = -11.00, steps = 1840\n",
      "09:07:37 [DEBUG] train episode 500: reward = 11.00, steps = 2329\n",
      "09:12:29 [DEBUG] train episode 501: reward = -14.00, steps = 1651\n",
      "09:18:32 [DEBUG] train episode 502: reward = -8.00, steps = 2043\n",
      "09:23:41 [DEBUG] train episode 503: reward = -11.00, steps = 1728\n",
      "09:28:22 [DEBUG] train episode 504: reward = -14.00, steps = 1582\n",
      "09:35:58 [DEBUG] train episode 505: reward = -3.00, steps = 2564\n",
      "09:40:39 [DEBUG] train episode 506: reward = -14.00, steps = 1584\n",
      "09:45:30 [DEBUG] train episode 507: reward = -13.00, steps = 1638\n",
      "09:52:55 [DEBUG] train episode 508: reward = -5.00, steps = 2506\n",
      "09:59:37 [DEBUG] train episode 509: reward = -8.00, steps = 2169\n",
      "10:06:24 [DEBUG] train episode 510: reward = -9.00, steps = 1944\n",
      "10:12:57 [DEBUG] train episode 511: reward = -10.00, steps = 1978\n",
      "10:22:17 [DEBUG] train episode 512: reward = 1.00, steps = 2760\n",
      "10:30:18 [DEBUG] train episode 513: reward = 10.00, steps = 2504\n",
      "10:38:05 [DEBUG] train episode 514: reward = -8.00, steps = 2250\n",
      "10:45:49 [DEBUG] train episode 515: reward = -6.00, steps = 2313\n",
      "10:55:09 [DEBUG] train episode 516: reward = -3.00, steps = 2792\n",
      "10:59:28 [DEBUG] train episode 517: reward = -16.00, steps = 1290\n",
      "11:06:54 [DEBUG] train episode 518: reward = 17.00, steps = 2152\n",
      "11:13:32 [DEBUG] train episode 519: reward = 17.00, steps = 2093\n",
      "11:21:27 [DEBUG] train episode 520: reward = 7.00, steps = 2527\n",
      "11:28:49 [DEBUG] train episode 521: reward = -6.00, steps = 2378\n",
      "11:36:44 [DEBUG] train episode 522: reward = -2.00, steps = 2488\n",
      "11:45:27 [DEBUG] train episode 523: reward = 1.00, steps = 2758\n",
      "11:52:48 [DEBUG] train episode 524: reward = -9.00, steps = 2275\n",
      "12:00:07 [DEBUG] train episode 525: reward = 14.00, steps = 2248\n",
      "12:09:28 [DEBUG] train episode 526: reward = -1.00, steps = 2757\n",
      "12:14:47 [DEBUG] train episode 527: reward = -13.00, steps = 1727\n",
      "12:23:03 [DEBUG] train episode 528: reward = -1.00, steps = 2677\n",
      "12:31:26 [DEBUG] train episode 529: reward = -2.00, steps = 2721\n",
      "12:39:57 [DEBUG] train episode 530: reward = -1.00, steps = 2757\n",
      "12:47:30 [DEBUG] train episode 531: reward = -4.00, steps = 2427\n",
      "12:55:43 [DEBUG] train episode 532: reward = -2.00, steps = 2671\n",
      "13:03:05 [DEBUG] train episode 533: reward = 8.00, steps = 2388\n",
      "13:10:04 [DEBUG] train episode 534: reward = -7.00, steps = 2260\n",
      "13:17:39 [DEBUG] train episode 535: reward = -3.00, steps = 2460\n",
      "13:24:37 [DEBUG] train episode 536: reward = -8.00, steps = 2254\n",
      "13:32:55 [DEBUG] train episode 537: reward = -4.00, steps = 2611\n",
      "13:39:12 [DEBUG] train episode 538: reward = -11.00, steps = 2009\n",
      "13:46:50 [DEBUG] train episode 539: reward = 8.00, steps = 2471\n",
      "13:51:46 [DEBUG] train episode 540: reward = -15.00, steps = 1592\n",
      "14:00:20 [DEBUG] train episode 541: reward = -1.00, steps = 2765\n",
      "14:08:48 [DEBUG] train episode 542: reward = -3.00, steps = 2578\n",
      "14:15:22 [DEBUG] train episode 543: reward = -11.00, steps = 1823\n",
      "14:23:40 [DEBUG] train episode 544: reward = -7.00, steps = 2294\n",
      "14:32:16 [DEBUG] train episode 545: reward = 8.00, steps = 2377\n",
      "14:38:45 [DEBUG] train episode 546: reward = -11.00, steps = 1792\n",
      "14:47:21 [DEBUG] train episode 547: reward = -5.00, steps = 2376\n",
      "14:56:11 [DEBUG] train episode 548: reward = -5.00, steps = 2568\n",
      "15:04:34 [DEBUG] train episode 549: reward = 1.00, steps = 2727\n",
      "15:12:56 [DEBUG] train episode 550: reward = -1.00, steps = 2730\n",
      "15:20:28 [DEBUG] train episode 551: reward = -6.00, steps = 2466\n",
      "15:24:39 [DEBUG] train episode 552: reward = -15.00, steps = 1360\n",
      "15:31:20 [DEBUG] train episode 553: reward = 14.00, steps = 2180\n",
      "15:36:59 [DEBUG] train episode 554: reward = 18.00, steps = 1843\n",
      "15:44:02 [DEBUG] train episode 555: reward = -5.00, steps = 2299\n",
      "15:52:52 [DEBUG] train episode 556: reward = -1.00, steps = 2882\n",
      "16:00:51 [DEBUG] train episode 557: reward = 8.00, steps = 2607\n",
      "16:08:27 [DEBUG] train episode 558: reward = -4.00, steps = 2483\n",
      "16:16:20 [DEBUG] train episode 559: reward = 8.00, steps = 2573\n",
      "16:25:10 [DEBUG] train episode 560: reward = -1.00, steps = 2884\n",
      "16:28:56 [DEBUG] train episode 561: reward = -16.00, steps = 1231\n",
      "16:36:40 [DEBUG] train episode 562: reward = -3.00, steps = 2527\n",
      "16:42:05 [DEBUG] train episode 563: reward = 17.00, steps = 1763\n",
      "16:49:49 [DEBUG] train episode 564: reward = 8.00, steps = 2527\n",
      "16:57:14 [DEBUG] train episode 565: reward = -1.00, steps = 2423\n",
      "17:03:05 [DEBUG] train episode 566: reward = -11.00, steps = 1838\n",
      "17:08:00 [DEBUG] train episode 567: reward = -12.00, steps = 1528\n",
      "17:15:21 [DEBUG] train episode 568: reward = 8.00, steps = 2234\n",
      "17:22:48 [DEBUG] train episode 569: reward = 12.00, steps = 2143\n",
      "17:31:15 [DEBUG] train episode 570: reward = -7.00, steps = 2439\n",
      "17:39:55 [DEBUG] train episode 571: reward = 11.00, steps = 2374\n",
      "17:48:56 [DEBUG] train episode 572: reward = -1.00, steps = 2686\n",
      "17:59:19 [DEBUG] train episode 573: reward = 1.00, steps = 3200\n",
      "18:05:21 [DEBUG] train episode 574: reward = 17.00, steps = 1876\n",
      "18:13:11 [DEBUG] train episode 575: reward = -4.00, steps = 2465\n",
      "18:23:13 [DEBUG] train episode 576: reward = -1.00, steps = 2838\n",
      "18:30:54 [DEBUG] train episode 577: reward = 10.00, steps = 2350\n",
      "18:39:08 [DEBUG] train episode 578: reward = 8.00, steps = 2473\n",
      "18:46:37 [DEBUG] train episode 579: reward = 7.00, steps = 2325\n",
      "18:51:39 [DEBUG] train episode 580: reward = -14.00, steps = 1534\n",
      "18:59:54 [DEBUG] train episode 581: reward = -6.00, steps = 2533\n",
      "19:07:54 [DEBUG] train episode 582: reward = 7.00, steps = 2477\n",
      "19:15:25 [DEBUG] train episode 583: reward = 6.00, steps = 2414\n",
      "19:23:12 [DEBUG] train episode 584: reward = -6.00, steps = 2495\n",
      "19:32:15 [DEBUG] train episode 585: reward = 5.00, steps = 2784\n",
      "19:39:50 [DEBUG] train episode 586: reward = 11.00, steps = 2363\n",
      "19:45:31 [DEBUG] train episode 587: reward = 17.00, steps = 1812\n",
      "19:53:45 [DEBUG] train episode 588: reward = 8.00, steps = 2652\n",
      "20:02:42 [DEBUG] train episode 589: reward = 1.00, steps = 2800\n",
      "20:10:19 [DEBUG] train episode 590: reward = 10.00, steps = 2427\n",
      "20:18:16 [DEBUG] train episode 591: reward = -3.00, steps = 2511\n",
      "20:26:21 [DEBUG] train episode 592: reward = 1.00, steps = 2586\n",
      "20:32:41 [DEBUG] train episode 593: reward = 13.00, steps = 2058\n",
      "20:41:02 [DEBUG] train episode 594: reward = -1.00, steps = 2724\n",
      "20:49:09 [DEBUG] train episode 595: reward = 7.00, steps = 2621\n",
      "20:56:51 [DEBUG] train episode 596: reward = 14.00, steps = 2301\n",
      "21:05:25 [DEBUG] train episode 597: reward = -4.00, steps = 2480\n",
      "21:10:57 [DEBUG] train episode 598: reward = -15.00, steps = 1628\n",
      "21:19:44 [DEBUG] train episode 599: reward = 8.00, steps = 2539\n",
      "21:27:37 [DEBUG] train episode 600: reward = 11.00, steps = 2313\n",
      "21:36:49 [DEBUG] train episode 601: reward = -3.00, steps = 2760\n",
      "21:45:08 [DEBUG] train episode 602: reward = 6.00, steps = 2474\n",
      "21:53:14 [DEBUG] train episode 603: reward = 6.00, steps = 2273\n",
      "22:00:53 [DEBUG] train episode 604: reward = 8.00, steps = 2226\n",
      "22:08:40 [DEBUG] train episode 605: reward = -8.00, steps = 2374\n",
      "22:16:45 [DEBUG] train episode 606: reward = -5.00, steps = 2421\n",
      "22:25:12 [DEBUG] train episode 607: reward = -6.00, steps = 2521\n",
      "22:31:09 [DEBUG] train episode 608: reward = 17.00, steps = 1773\n",
      "22:36:15 [DEBUG] train episode 609: reward = -14.00, steps = 1498\n",
      "22:44:52 [DEBUG] train episode 610: reward = -1.00, steps = 2528\n",
      "22:49:29 [DEBUG] train episode 611: reward = -15.00, steps = 1424\n",
      "22:56:06 [DEBUG] train episode 612: reward = -11.00, steps = 2089\n",
      "23:02:52 [DEBUG] train episode 613: reward = 16.00, steps = 2183\n",
      "23:06:29 [DEBUG] train episode 614: reward = -17.00, steps = 1171\n",
      "23:13:50 [DEBUG] train episode 615: reward = 8.00, steps = 2257\n",
      "23:19:53 [DEBUG] train episode 616: reward = 14.00, steps = 1903\n",
      "23:27:12 [DEBUG] train episode 617: reward = 6.00, steps = 2326\n",
      "23:32:37 [DEBUG] train episode 618: reward = 18.00, steps = 1747\n",
      "23:40:15 [DEBUG] train episode 619: reward = 6.00, steps = 2455\n",
      "23:48:24 [DEBUG] train episode 620: reward = 7.00, steps = 2547\n",
      "23:53:55 [DEBUG] train episode 621: reward = -13.00, steps = 1652\n",
      "23:58:24 [DEBUG] train episode 622: reward = -17.00, steps = 1238\n",
      "00:07:52 [DEBUG] train episode 623: reward = 1.00, steps = 2843\n",
      "00:16:37 [DEBUG] train episode 624: reward = -1.00, steps = 2699\n",
      "00:25:56 [DEBUG] train episode 625: reward = 6.00, steps = 2892\n",
      "00:32:47 [DEBUG] train episode 626: reward = 11.00, steps = 2216\n",
      "00:39:21 [DEBUG] train episode 627: reward = -7.00, steps = 1963\n",
      "00:44:56 [DEBUG] train episode 628: reward = 14.00, steps = 1867\n",
      "00:51:53 [DEBUG] train episode 629: reward = -8.00, steps = 2339\n",
      "00:58:17 [DEBUG] train episode 630: reward = 12.00, steps = 2167\n",
      "01:03:56 [DEBUG] train episode 631: reward = -11.00, steps = 1909\n",
      "01:08:34 [DEBUG] train episode 632: reward = 20.00, steps = 1563\n",
      "01:14:44 [DEBUG] train episode 633: reward = 12.00, steps = 2093\n",
      "01:22:35 [DEBUG] train episode 634: reward = 6.00, steps = 2639\n",
      "01:30:04 [DEBUG] train episode 635: reward = 8.00, steps = 2522\n",
      "01:35:40 [DEBUG] train episode 636: reward = -9.00, steps = 1879\n",
      "01:40:29 [DEBUG] train episode 637: reward = -13.00, steps = 1626\n",
      "01:47:03 [DEBUG] train episode 638: reward = -5.00, steps = 2207\n",
      "01:55:04 [DEBUG] train episode 639: reward = -2.00, steps = 2697\n",
      "02:03:20 [DEBUG] train episode 640: reward = 1.00, steps = 2767\n",
      "02:09:25 [DEBUG] train episode 641: reward = -7.00, steps = 2049\n",
      "02:17:25 [DEBUG] train episode 642: reward = -2.00, steps = 2686\n",
      "02:23:29 [DEBUG] train episode 643: reward = 11.00, steps = 2027\n",
      "02:27:02 [DEBUG] train episode 644: reward = -17.00, steps = 1193\n",
      "02:35:15 [DEBUG] train episode 645: reward = 1.00, steps = 2758\n",
      "02:40:44 [DEBUG] train episode 646: reward = 14.00, steps = 1839\n",
      "02:46:11 [DEBUG] train episode 647: reward = -10.00, steps = 1832\n",
      "02:52:45 [DEBUG] train episode 648: reward = -8.00, steps = 2211\n",
      "02:59:06 [DEBUG] train episode 649: reward = 8.00, steps = 2135\n",
      "03:05:54 [DEBUG] train episode 650: reward = 8.00, steps = 2284\n",
      "03:14:39 [DEBUG] train episode 651: reward = 1.00, steps = 2944\n",
      "03:18:54 [DEBUG] train episode 652: reward = -14.00, steps = 1429\n",
      "03:25:10 [DEBUG] train episode 653: reward = 14.00, steps = 2025\n",
      "03:32:18 [DEBUG] train episode 654: reward = -7.00, steps = 2377\n",
      "03:37:23 [DEBUG] train episode 655: reward = -10.00, steps = 1696\n",
      "03:44:24 [DEBUG] train episode 656: reward = 6.00, steps = 2338\n",
      "03:51:13 [DEBUG] train episode 657: reward = -7.00, steps = 2281\n",
      "03:58:31 [DEBUG] train episode 658: reward = 1.00, steps = 2456\n",
      "04:04:27 [DEBUG] train episode 659: reward = 11.00, steps = 1988\n",
      "04:12:02 [DEBUG] train episode 660: reward = 1.00, steps = 2549\n",
      "04:20:10 [DEBUG] train episode 661: reward = -1.00, steps = 2713\n",
      "04:28:05 [DEBUG] train episode 662: reward = -3.00, steps = 2620\n",
      "04:32:36 [DEBUG] train episode 663: reward = -14.00, steps = 1513\n",
      "04:37:55 [DEBUG] train episode 664: reward = -10.00, steps = 1789\n",
      "04:44:00 [DEBUG] train episode 665: reward = -9.00, steps = 2040\n",
      "04:48:57 [DEBUG] train episode 666: reward = -14.00, steps = 1667\n",
      "04:56:27 [DEBUG] train episode 667: reward = 1.00, steps = 2525\n",
      "05:01:22 [DEBUG] train episode 668: reward = -14.00, steps = 1658\n",
      "05:05:26 [DEBUG] train episode 669: reward = -16.00, steps = 1362\n",
      "05:13:02 [DEBUG] train episode 670: reward = 4.00, steps = 2554\n",
      "05:19:20 [DEBUG] train episode 671: reward = 8.00, steps = 2117\n",
      "05:26:19 [DEBUG] train episode 672: reward = -5.00, steps = 2329\n",
      "05:33:37 [DEBUG] train episode 673: reward = -6.00, steps = 2444\n",
      "05:40:36 [DEBUG] train episode 674: reward = 6.00, steps = 2352\n",
      "05:47:46 [DEBUG] train episode 675: reward = 1.00, steps = 2414\n",
      "05:52:50 [DEBUG] train episode 676: reward = -12.00, steps = 1707\n",
      "05:58:12 [DEBUG] train episode 677: reward = -12.00, steps = 1802\n",
      "06:05:46 [DEBUG] train episode 678: reward = -1.00, steps = 2538\n",
      "06:11:02 [DEBUG] train episode 679: reward = -10.00, steps = 1779\n",
      "06:16:27 [DEBUG] train episode 680: reward = 14.00, steps = 1821\n",
      "06:25:02 [DEBUG] train episode 681: reward = -1.00, steps = 2880\n",
      "06:32:42 [DEBUG] train episode 682: reward = 1.00, steps = 2572\n",
      "06:40:11 [DEBUG] train episode 683: reward = -1.00, steps = 2511\n",
      "06:46:49 [DEBUG] train episode 684: reward = 8.00, steps = 2227\n",
      "06:52:27 [DEBUG] train episode 685: reward = -11.00, steps = 1889\n",
      "06:59:15 [DEBUG] train episode 686: reward = -2.00, steps = 2283\n",
      "07:06:02 [DEBUG] train episode 687: reward = 8.00, steps = 2274\n",
      "07:14:14 [DEBUG] train episode 688: reward = 1.00, steps = 2740\n",
      "07:20:54 [DEBUG] train episode 689: reward = -7.00, steps = 2234\n",
      "07:27:11 [DEBUG] train episode 690: reward = 12.00, steps = 2093\n",
      "07:34:45 [DEBUG] train episode 691: reward = -6.00, steps = 2520\n",
      "07:40:05 [DEBUG] train episode 692: reward = 16.00, steps = 1748\n",
      "07:45:37 [DEBUG] train episode 693: reward = -10.00, steps = 1852\n",
      "07:51:14 [DEBUG] train episode 694: reward = -9.00, steps = 1884\n",
      "07:58:45 [DEBUG] train episode 695: reward = -1.00, steps = 2524\n",
      "08:06:30 [DEBUG] train episode 696: reward = -1.00, steps = 2598\n",
      "08:13:03 [DEBUG] train episode 697: reward = 7.00, steps = 2204\n",
      "08:18:24 [DEBUG] train episode 698: reward = 18.00, steps = 1797\n",
      "08:24:27 [DEBUG] train episode 699: reward = -5.00, steps = 2364\n",
      "08:31:22 [DEBUG] train episode 700: reward = -8.00, steps = 2326\n",
      "08:36:07 [DEBUG] train episode 701: reward = -13.00, steps = 1605\n",
      "08:42:48 [DEBUG] train episode 702: reward = 9.00, steps = 2253\n",
      "08:50:06 [DEBUG] train episode 703: reward = 8.00, steps = 2454\n",
      "08:56:21 [DEBUG] train episode 704: reward = 14.00, steps = 2107\n",
      "09:01:01 [DEBUG] train episode 705: reward = -13.00, steps = 1565\n",
      "09:09:00 [DEBUG] train episode 706: reward = -1.00, steps = 2685\n",
      "09:14:48 [DEBUG] train episode 707: reward = -12.00, steps = 1944\n",
      "09:21:45 [DEBUG] train episode 708: reward = 10.00, steps = 2215\n",
      "09:28:13 [DEBUG] train episode 709: reward = 13.00, steps = 2175\n",
      "09:37:25 [DEBUG] train episode 710: reward = 1.00, steps = 2709\n",
      "09:42:30 [DEBUG] train episode 711: reward = 19.00, steps = 1618\n",
      "09:48:42 [DEBUG] train episode 712: reward = -8.00, steps = 1967\n",
      "09:56:05 [DEBUG] train episode 713: reward = 14.00, steps = 2222\n",
      "10:04:01 [DEBUG] train episode 714: reward = 12.00, steps = 2351\n",
      "10:10:31 [DEBUG] train episode 715: reward = 17.00, steps = 1918\n",
      "10:17:43 [DEBUG] train episode 716: reward = -6.00, steps = 2192\n",
      "10:25:25 [DEBUG] train episode 717: reward = 13.00, steps = 2382\n",
      "10:33:31 [DEBUG] train episode 718: reward = 8.00, steps = 2420\n",
      "10:40:26 [DEBUG] train episode 719: reward = 14.00, steps = 2086\n",
      "10:49:49 [DEBUG] train episode 720: reward = 1.00, steps = 2874\n",
      "10:57:52 [DEBUG] train episode 721: reward = 1.00, steps = 2482\n",
      "11:05:15 [DEBUG] train episode 722: reward = 8.00, steps = 2283\n",
      "11:12:53 [DEBUG] train episode 723: reward = 6.00, steps = 2358\n",
      "11:20:00 [DEBUG] train episode 724: reward = 10.00, steps = 2213\n",
      "11:28:04 [DEBUG] train episode 725: reward = -4.00, steps = 2523\n",
      "11:34:34 [DEBUG] train episode 726: reward = -9.00, steps = 2071\n",
      "11:41:19 [DEBUG] train episode 727: reward = -9.00, steps = 2176\n",
      "11:45:43 [DEBUG] train episode 728: reward = -16.00, steps = 1415\n",
      "11:54:27 [DEBUG] train episode 729: reward = -1.00, steps = 2766\n",
      "12:01:44 [DEBUG] train episode 730: reward = -6.00, steps = 2331\n",
      "12:05:25 [DEBUG] train episode 731: reward = -16.00, steps = 1174\n",
      "12:09:52 [DEBUG] train episode 732: reward = -16.00, steps = 1432\n",
      "12:15:08 [DEBUG] train episode 733: reward = 18.00, steps = 1700\n",
      "12:22:12 [DEBUG] train episode 734: reward = 9.00, steps = 2273\n",
      "12:27:20 [DEBUG] train episode 735: reward = -14.00, steps = 1649\n",
      "12:31:52 [DEBUG] train episode 736: reward = -16.00, steps = 1464\n",
      "12:36:30 [DEBUG] train episode 737: reward = -14.00, steps = 1495\n",
      "12:40:34 [DEBUG] train episode 738: reward = -16.00, steps = 1311\n",
      "12:44:51 [DEBUG] train episode 739: reward = -15.00, steps = 1377\n",
      "12:49:25 [DEBUG] train episode 740: reward = -16.00, steps = 1471\n",
      "12:53:17 [DEBUG] train episode 741: reward = -16.00, steps = 1245\n",
      "13:01:14 [DEBUG] train episode 742: reward = 1.00, steps = 2543\n",
      "13:07:06 [DEBUG] train episode 743: reward = -9.00, steps = 1875\n",
      "13:15:47 [DEBUG] train episode 744: reward = -1.00, steps = 2775\n",
      "13:22:12 [DEBUG] train episode 745: reward = -10.00, steps = 2047\n",
      "13:30:26 [DEBUG] train episode 746: reward = -3.00, steps = 2637\n",
      "13:35:08 [DEBUG] train episode 747: reward = -15.00, steps = 1506\n",
      "13:40:45 [DEBUG] train episode 748: reward = -10.00, steps = 1808\n",
      "13:46:02 [DEBUG] train episode 749: reward = -12.00, steps = 1698\n",
      "13:51:51 [DEBUG] train episode 750: reward = 17.00, steps = 1862\n",
      "13:58:42 [DEBUG] train episode 751: reward = 10.00, steps = 2205\n",
      "14:05:46 [DEBUG] train episode 752: reward = -6.00, steps = 2276\n",
      "14:11:35 [DEBUG] train episode 753: reward = 16.00, steps = 1877\n",
      "14:18:57 [DEBUG] train episode 754: reward = 8.00, steps = 2379\n",
      "14:24:35 [DEBUG] train episode 755: reward = -13.00, steps = 1811\n",
      "14:31:48 [DEBUG] train episode 756: reward = -6.00, steps = 2327\n",
      "14:39:27 [DEBUG] train episode 757: reward = -6.00, steps = 2480\n",
      "14:45:08 [DEBUG] train episode 758: reward = -11.00, steps = 1835\n",
      "14:50:59 [DEBUG] train episode 759: reward = -12.00, steps = 1898\n",
      "14:57:11 [DEBUG] train episode 760: reward = -10.00, steps = 2002\n",
      "15:03:22 [DEBUG] train episode 761: reward = 14.00, steps = 1968\n",
      "15:06:48 [DEBUG] train episode 762: reward = -18.00, steps = 1063\n",
      "15:13:20 [DEBUG] train episode 763: reward = -10.00, steps = 1891\n",
      "15:18:26 [DEBUG] train episode 764: reward = -11.00, steps = 1554\n",
      "15:25:27 [DEBUG] train episode 765: reward = -8.00, steps = 2228\n",
      "15:29:53 [DEBUG] train episode 766: reward = -16.00, steps = 1372\n",
      "15:38:01 [DEBUG] train episode 767: reward = -1.00, steps = 2625\n",
      "15:45:29 [DEBUG] train episode 768: reward = 11.00, steps = 2342\n",
      "15:52:14 [DEBUG] train episode 769: reward = 10.00, steps = 2157\n",
      "16:00:52 [DEBUG] train episode 770: reward = 1.00, steps = 2758\n",
      "16:06:03 [DEBUG] train episode 771: reward = 19.00, steps = 1656\n",
      "16:12:29 [DEBUG] train episode 772: reward = -7.00, steps = 2065\n",
      "16:20:29 [DEBUG] train episode 773: reward = 8.00, steps = 2577\n",
      "16:27:56 [DEBUG] train episode 774: reward = -6.00, steps = 2375\n",
      "16:35:13 [DEBUG] train episode 775: reward = 9.00, steps = 2294\n",
      "16:40:19 [DEBUG] train episode 776: reward = -14.00, steps = 1579\n",
      "16:53:19 [DEBUG] train episode 777: reward = 1.00, steps = 2806\n",
      "17:01:34 [DEBUG] train episode 778: reward = 8.00, steps = 2221\n",
      "17:09:35 [DEBUG] train episode 779: reward = 12.00, steps = 2398\n",
      "17:18:17 [DEBUG] train episode 780: reward = -1.00, steps = 2722\n",
      "17:26:34 [DEBUG] train episode 781: reward = 9.00, steps = 2579\n",
      "17:32:33 [DEBUG] train episode 782: reward = 17.00, steps = 1865\n",
      "17:40:39 [DEBUG] train episode 783: reward = -3.00, steps = 2535\n",
      "17:49:16 [DEBUG] train episode 784: reward = 8.00, steps = 2716\n",
      "17:55:00 [DEBUG] train episode 785: reward = 17.00, steps = 1804\n",
      "18:03:42 [DEBUG] train episode 786: reward = 1.00, steps = 2694\n",
      "18:11:28 [DEBUG] train episode 787: reward = -1.00, steps = 2373\n",
      "18:17:19 [DEBUG] train episode 788: reward = 17.00, steps = 1821\n",
      "18:24:48 [DEBUG] train episode 789: reward = 8.00, steps = 2325\n",
      "18:33:13 [DEBUG] train episode 790: reward = 1.00, steps = 2642\n",
      "18:39:57 [DEBUG] train episode 791: reward = 14.00, steps = 2111\n",
      "18:45:58 [DEBUG] train episode 792: reward = 17.00, steps = 1896\n",
      "18:54:22 [DEBUG] train episode 793: reward = 3.00, steps = 2675\n",
      "19:03:12 [DEBUG] train episode 794: reward = -3.00, steps = 2596\n",
      "19:14:14 [DEBUG] train episode 795: reward = 8.00, steps = 2555\n",
      "19:21:42 [DEBUG] train episode 796: reward = 18.00, steps = 2091\n",
      "19:31:04 [DEBUG] train episode 797: reward = -1.00, steps = 2420\n",
      "19:37:11 [DEBUG] train episode 798: reward = -13.00, steps = 1732\n",
      "19:44:34 [DEBUG] train episode 799: reward = -2.00, steps = 2241\n",
      "19:51:08 [DEBUG] train episode 800: reward = 14.00, steps = 2016\n",
      "19:59:56 [DEBUG] train episode 801: reward = 1.00, steps = 2794\n",
      "20:06:07 [DEBUG] train episode 802: reward = 14.00, steps = 1916\n",
      "20:11:03 [DEBUG] train episode 803: reward = -16.00, steps = 1500\n",
      "20:16:44 [DEBUG] train episode 804: reward = -14.00, steps = 1749\n",
      "20:22:21 [DEBUG] train episode 805: reward = 17.00, steps = 1723\n",
      "20:27:55 [DEBUG] train episode 806: reward = 17.00, steps = 1775\n",
      "20:32:13 [DEBUG] train episode 807: reward = -16.00, steps = 1389\n",
      "20:39:41 [DEBUG] train episode 808: reward = 8.00, steps = 2397\n",
      "20:47:20 [DEBUG] train episode 809: reward = -10.00, steps = 2157\n",
      "20:50:15 [DEBUG] train episode 810: reward = -21.00, steps = 874\n",
      "20:59:33 [DEBUG] train episode 811: reward = -3.00, steps = 2690\n",
      "21:05:13 [DEBUG] train episode 812: reward = -13.00, steps = 1641\n",
      "21:12:55 [DEBUG] train episode 813: reward = -5.00, steps = 2089\n",
      "21:21:54 [DEBUG] train episode 814: reward = 11.00, steps = 2043\n",
      "21:28:16 [DEBUG] train episode 815: reward = -17.00, steps = 1482\n",
      "21:37:14 [DEBUG] train episode 816: reward = 10.00, steps = 2104\n",
      "21:44:37 [DEBUG] train episode 817: reward = -13.00, steps = 1653\n",
      "21:50:01 [DEBUG] train episode 818: reward = -17.00, steps = 1282\n",
      "22:00:53 [DEBUG] train episode 819: reward = -3.00, steps = 2601\n",
      "22:10:51 [DEBUG] train episode 820: reward = -1.00, steps = 2381\n",
      "22:17:03 [DEBUG] train episode 821: reward = -14.00, steps = 1484\n",
      "22:22:00 [DEBUG] train episode 822: reward = -16.00, steps = 1199\n",
      "22:30:20 [DEBUG] train episode 823: reward = -6.00, steps = 2287\n",
      "22:33:32 [DEBUG] train episode 824: reward = -20.00, steps = 1017\n",
      "22:38:51 [DEBUG] train episode 825: reward = -12.00, steps = 1692\n",
      "22:44:53 [DEBUG] train episode 826: reward = -10.00, steps = 1883\n",
      "22:51:31 [DEBUG] train episode 827: reward = 12.00, steps = 2054\n",
      "22:55:13 [DEBUG] train episode 828: reward = -19.00, steps = 1099\n",
      "23:01:46 [DEBUG] train episode 829: reward = -6.00, steps = 2119\n",
      "23:10:57 [DEBUG] train episode 830: reward = 1.00, steps = 2981\n",
      "23:16:55 [DEBUG] train episode 831: reward = 16.00, steps = 1916\n",
      "23:23:57 [DEBUG] train episode 832: reward = -4.00, steps = 2286\n",
      "23:31:18 [DEBUG] train episode 833: reward = -4.00, steps = 2387\n",
      "23:37:37 [DEBUG] train episode 834: reward = -11.00, steps = 2039\n",
      "23:40:24 [DEBUG] train episode 835: reward = -21.00, steps = 900\n",
      "23:44:02 [DEBUG] train episode 836: reward = -18.00, steps = 1179\n",
      "23:50:01 [DEBUG] train episode 837: reward = -11.00, steps = 1950\n",
      "23:55:23 [DEBUG] train episode 838: reward = -11.00, steps = 1748\n",
      "00:00:13 [DEBUG] train episode 839: reward = -13.00, steps = 1557\n",
      "00:07:13 [DEBUG] train episode 840: reward = -8.00, steps = 2264\n",
      "00:12:36 [DEBUG] train episode 841: reward = -13.00, steps = 1716\n",
      "00:19:19 [DEBUG] train episode 842: reward = 14.00, steps = 2040\n",
      "00:26:37 [DEBUG] train episode 843: reward = -5.00, steps = 2443\n",
      "00:31:05 [DEBUG] train episode 844: reward = -15.00, steps = 1491\n",
      "00:36:15 [DEBUG] train episode 845: reward = 18.00, steps = 1746\n",
      "00:43:05 [DEBUG] train episode 846: reward = 14.00, steps = 2320\n",
      "00:47:07 [DEBUG] train episode 847: reward = -15.00, steps = 1368\n",
      "00:52:19 [DEBUG] train episode 848: reward = -10.00, steps = 1764\n",
      "00:58:06 [DEBUG] train episode 849: reward = 14.00, steps = 1955\n",
      "01:05:45 [DEBUG] train episode 850: reward = 1.00, steps = 2582\n",
      "01:09:28 [DEBUG] train episode 851: reward = -17.00, steps = 1257\n",
      "01:16:41 [DEBUG] train episode 852: reward = -3.00, steps = 2439\n",
      "01:23:29 [DEBUG] train episode 853: reward = -6.00, steps = 2298\n",
      "01:29:14 [DEBUG] train episode 854: reward = -10.00, steps = 1938\n",
      "01:32:36 [DEBUG] train episode 855: reward = -17.00, steps = 1134\n",
      "01:38:58 [DEBUG] train episode 856: reward = 11.00, steps = 2160\n",
      "01:44:53 [DEBUG] train episode 857: reward = -10.00, steps = 2010\n",
      "01:50:55 [DEBUG] train episode 858: reward = 12.00, steps = 2043\n",
      "01:56:20 [DEBUG] train episode 859: reward = -12.00, steps = 1829\n",
      "02:03:47 [DEBUG] train episode 860: reward = 5.00, steps = 2509\n",
      "02:11:19 [DEBUG] train episode 861: reward = 1.00, steps = 2543\n",
      "02:15:20 [DEBUG] train episode 862: reward = -15.00, steps = 1352\n",
      "02:22:01 [DEBUG] train episode 863: reward = -6.00, steps = 2242\n",
      "02:29:07 [DEBUG] train episode 864: reward = 1.00, steps = 2378\n",
      "02:34:34 [DEBUG] train episode 865: reward = -11.00, steps = 1823\n",
      "02:41:52 [DEBUG] train episode 866: reward = 1.00, steps = 2449\n",
      "02:47:59 [DEBUG] train episode 867: reward = 14.00, steps = 2006\n",
      "02:56:35 [DEBUG] train episode 868: reward = -2.00, steps = 2890\n",
      "03:02:46 [DEBUG] train episode 869: reward = -11.00, steps = 2070\n",
      "03:10:27 [DEBUG] train episode 870: reward = 1.00, steps = 2575\n",
      "03:18:47 [DEBUG] train episode 871: reward = -1.00, steps = 2799\n",
      "03:24:02 [DEBUG] train episode 872: reward = -12.00, steps = 1753\n",
      "03:28:45 [DEBUG] train episode 873: reward = -14.00, steps = 1583\n",
      "03:36:52 [DEBUG] train episode 874: reward = 1.00, steps = 2723\n",
      "03:43:54 [DEBUG] train episode 875: reward = 8.00, steps = 2355\n",
      "03:47:50 [DEBUG] train episode 876: reward = -17.00, steps = 1314\n",
      "03:54:29 [DEBUG] train episode 877: reward = 8.00, steps = 2223\n",
      "04:01:39 [DEBUG] train episode 878: reward = 1.00, steps = 2391\n",
      "04:10:21 [DEBUG] train episode 879: reward = -1.00, steps = 2838\n",
      "04:14:41 [DEBUG] train episode 880: reward = -15.00, steps = 1450\n",
      "04:22:05 [DEBUG] train episode 881: reward = -6.00, steps = 2469\n",
      "04:26:48 [DEBUG] train episode 882: reward = -16.00, steps = 1581\n",
      "04:32:13 [DEBUG] train episode 883: reward = 17.00, steps = 1807\n",
      "04:38:15 [DEBUG] train episode 884: reward = 14.00, steps = 2020\n",
      "04:46:54 [DEBUG] train episode 885: reward = -1.00, steps = 2893\n",
      "04:52:55 [DEBUG] train episode 886: reward = 12.00, steps = 2016\n",
      "04:59:46 [DEBUG] train episode 887: reward = 8.00, steps = 2292\n",
      "05:05:58 [DEBUG] train episode 888: reward = 14.00, steps = 2082\n",
      "05:09:56 [DEBUG] train episode 889: reward = -18.00, steps = 1326\n",
      "05:14:02 [DEBUG] train episode 890: reward = -16.00, steps = 1372\n",
      "05:21:33 [DEBUG] train episode 891: reward = -4.00, steps = 2515\n",
      "05:25:28 [DEBUG] train episode 892: reward = -18.00, steps = 1314\n",
      "05:32:14 [DEBUG] train episode 893: reward = 11.00, steps = 2273\n",
      "05:40:42 [DEBUG] train episode 894: reward = 3.00, steps = 2848\n",
      "05:45:39 [DEBUG] train episode 895: reward = -12.00, steps = 1663\n",
      "05:51:52 [DEBUG] train episode 896: reward = 12.00, steps = 2095\n",
      "05:59:36 [DEBUG] train episode 897: reward = -2.00, steps = 2595\n",
      "06:04:02 [DEBUG] train episode 898: reward = -15.00, steps = 1490\n",
      "06:07:31 [DEBUG] train episode 899: reward = -19.00, steps = 1168\n",
      "06:16:08 [DEBUG] train episode 900: reward = 1.00, steps = 2884\n",
      "06:23:55 [DEBUG] train episode 901: reward = 1.00, steps = 2588\n",
      "06:30:06 [DEBUG] train episode 902: reward = 14.00, steps = 2075\n",
      "06:36:13 [DEBUG] train episode 903: reward = 16.00, steps = 2052\n",
      "06:43:22 [DEBUG] train episode 904: reward = -1.00, steps = 2407\n",
      "06:49:38 [DEBUG] train episode 905: reward = 12.00, steps = 2107\n",
      "06:56:09 [DEBUG] train episode 906: reward = 16.00, steps = 2197\n",
      "07:02:33 [DEBUG] train episode 907: reward = -5.00, steps = 2151\n",
      "07:09:01 [DEBUG] train episode 908: reward = -9.00, steps = 2177\n",
      "07:16:17 [DEBUG] train episode 909: reward = 8.00, steps = 2443\n",
      "07:23:06 [DEBUG] train episode 910: reward = 12.00, steps = 2278\n",
      "07:29:11 [DEBUG] train episode 911: reward = 11.00, steps = 2051\n",
      "07:31:58 [DEBUG] train episode 912: reward = -19.00, steps = 938\n",
      "07:38:49 [DEBUG] train episode 913: reward = 11.00, steps = 2310\n",
      "07:46:50 [DEBUG] train episode 914: reward = -3.00, steps = 2708\n",
      "07:53:13 [DEBUG] train episode 915: reward = -7.00, steps = 2168\n",
      "07:59:16 [DEBUG] train episode 916: reward = 16.00, steps = 2036\n",
      "08:06:08 [DEBUG] train episode 917: reward = 12.00, steps = 2304\n",
      "08:14:36 [DEBUG] train episode 918: reward = -2.00, steps = 2850\n",
      "08:20:48 [DEBUG] train episode 919: reward = -8.00, steps = 2083\n",
      "08:27:17 [DEBUG] train episode 920: reward = 14.00, steps = 1942\n",
      "08:36:11 [DEBUG] train episode 921: reward = -1.00, steps = 2817\n",
      "08:41:02 [DEBUG] train episode 922: reward = -12.00, steps = 1557\n",
      "08:49:36 [DEBUG] train episode 923: reward = 1.00, steps = 2624\n",
      "08:57:10 [DEBUG] train episode 924: reward = -4.00, steps = 2426\n",
      "09:02:07 [DEBUG] train episode 925: reward = -12.00, steps = 1589\n",
      "09:09:03 [DEBUG] train episode 926: reward = 8.00, steps = 2215\n",
      "09:17:10 [DEBUG] train episode 927: reward = -2.00, steps = 2636\n",
      "09:23:04 [DEBUG] train episode 928: reward = -11.00, steps = 1910\n",
      "09:29:31 [DEBUG] train episode 929: reward = 17.00, steps = 2100\n",
      "09:37:07 [DEBUG] train episode 930: reward = 8.00, steps = 2503\n",
      "09:44:42 [DEBUG] train episode 931: reward = 8.00, steps = 2505\n",
      "09:49:57 [DEBUG] train episode 932: reward = -10.00, steps = 1735\n",
      "09:57:23 [DEBUG] train episode 933: reward = -3.00, steps = 2440\n",
      "10:03:03 [DEBUG] train episode 934: reward = 16.00, steps = 1841\n",
      "10:11:04 [DEBUG] train episode 935: reward = -2.00, steps = 2611\n",
      "10:19:21 [DEBUG] train episode 936: reward = -1.00, steps = 2716\n",
      "10:26:12 [DEBUG] train episode 937: reward = 8.00, steps = 2233\n",
      "10:33:19 [DEBUG] train episode 938: reward = 11.00, steps = 2321\n",
      "10:39:35 [DEBUG] train episode 939: reward = 12.00, steps = 2056\n",
      "10:46:22 [DEBUG] train episode 940: reward = -8.00, steps = 2231\n",
      "10:51:06 [DEBUG] train episode 941: reward = -15.00, steps = 1557\n",
      "10:57:57 [DEBUG] train episode 942: reward = -7.00, steps = 2253\n",
      "11:03:50 [DEBUG] train episode 943: reward = 12.00, steps = 1934\n",
      "11:09:12 [DEBUG] train episode 944: reward = -10.00, steps = 1764\n",
      "11:15:24 [DEBUG] train episode 945: reward = 11.00, steps = 2039\n",
      "11:18:27 [DEBUG] train episode 946: reward = -19.00, steps = 1002\n",
      "11:25:40 [DEBUG] train episode 947: reward = -6.00, steps = 2369\n",
      "11:28:53 [DEBUG] train episode 948: reward = -18.00, steps = 1059\n",
      "11:36:12 [DEBUG] train episode 949: reward = -6.00, steps = 2403\n",
      "11:42:40 [DEBUG] train episode 950: reward = -9.00, steps = 2133\n",
      "11:45:14 [DEBUG] train episode 951: reward = -20.00, steps = 842\n",
      "11:53:19 [DEBUG] train episode 952: reward = 1.00, steps = 2670\n",
      "12:00:31 [DEBUG] train episode 953: reward = -1.00, steps = 2376\n",
      "12:07:16 [DEBUG] train episode 954: reward = 8.00, steps = 2219\n",
      "12:10:42 [DEBUG] train episode 955: reward = -16.00, steps = 1127\n",
      "12:17:05 [DEBUG] train episode 956: reward = -5.00, steps = 2093\n",
      "12:23:15 [DEBUG] train episode 957: reward = -9.00, steps = 2018\n",
      "12:29:39 [DEBUG] train episode 958: reward = -7.00, steps = 2104\n",
      "12:34:08 [DEBUG] train episode 959: reward = -16.00, steps = 1471\n",
      "12:37:41 [DEBUG] train episode 960: reward = -19.00, steps = 1172\n",
      "12:44:35 [DEBUG] train episode 961: reward = 11.00, steps = 2273\n",
      "12:49:03 [DEBUG] train episode 962: reward = -14.00, steps = 1475\n",
      "12:55:48 [DEBUG] train episode 963: reward = 8.00, steps = 2222\n",
      "13:02:23 [DEBUG] train episode 964: reward = -7.00, steps = 2169\n",
      "13:09:01 [DEBUG] train episode 965: reward = -7.00, steps = 2190\n",
      "13:13:16 [DEBUG] train episode 966: reward = -15.00, steps = 1394\n",
      "13:19:00 [DEBUG] train episode 967: reward = 18.00, steps = 1739\n",
      "13:25:37 [DEBUG] train episode 968: reward = 17.00, steps = 1804\n",
      "13:29:23 [DEBUG] train episode 969: reward = -19.00, steps = 1017\n",
      "13:34:15 [DEBUG] train episode 970: reward = -16.00, steps = 1308\n",
      "13:37:35 [DEBUG] train episode 971: reward = -19.00, steps = 919\n",
      "13:46:35 [DEBUG] train episode 972: reward = -2.00, steps = 2579\n",
      "13:54:29 [DEBUG] train episode 973: reward = 10.00, steps = 2354\n",
      "14:00:55 [DEBUG] train episode 974: reward = 11.00, steps = 2020\n",
      "14:07:30 [DEBUG] train episode 975: reward = 14.00, steps = 2093\n",
      "14:16:23 [DEBUG] train episode 976: reward = -2.00, steps = 2866\n",
      "14:21:44 [DEBUG] train episode 977: reward = 18.00, steps = 1726\n",
      "14:28:24 [DEBUG] train episode 978: reward = -7.00, steps = 2170\n",
      "14:31:26 [DEBUG] train episode 979: reward = -19.00, steps = 989\n",
      "14:36:23 [DEBUG] train episode 980: reward = -13.00, steps = 1619\n",
      "14:43:37 [DEBUG] train episode 981: reward = -4.00, steps = 2379\n",
      "14:46:56 [DEBUG] train episode 982: reward = -18.00, steps = 1090\n",
      "14:55:30 [DEBUG] train episode 983: reward = -1.00, steps = 2836\n",
      "15:00:58 [DEBUG] train episode 984: reward = -9.00, steps = 1801\n",
      "15:06:44 [DEBUG] train episode 985: reward = -10.00, steps = 1904\n",
      "15:12:51 [DEBUG] train episode 986: reward = 13.00, steps = 2008\n",
      "15:18:15 [DEBUG] train episode 987: reward = 19.00, steps = 1785\n",
      "15:27:00 [DEBUG] train episode 988: reward = -1.00, steps = 2879\n",
      "15:30:07 [DEBUG] train episode 989: reward = -18.00, steps = 1032\n",
      "15:36:02 [DEBUG] train episode 990: reward = -9.00, steps = 1956\n",
      "15:39:42 [DEBUG] train episode 991: reward = -16.00, steps = 1210\n",
      "15:47:51 [DEBUG] train episode 992: reward = 1.00, steps = 2694\n",
      "15:52:22 [DEBUG] train episode 993: reward = -14.00, steps = 1495\n",
      "15:57:50 [DEBUG] train episode 994: reward = -12.00, steps = 1821\n",
      "16:04:31 [DEBUG] train episode 995: reward = 8.00, steps = 2212\n",
      "16:08:59 [DEBUG] train episode 996: reward = -14.00, steps = 1479\n",
      "16:13:43 [DEBUG] train episode 997: reward = -14.00, steps = 1562\n",
      "16:21:03 [DEBUG] train episode 998: reward = 11.00, steps = 2416\n",
      "16:23:51 [DEBUG] train episode 999: reward = -19.00, steps = 915\n",
      "16:31:06 [DEBUG] train episode 1000: reward = -6.00, steps = 2394\n",
      "16:34:27 [DEBUG] train episode 1001: reward = -18.00, steps = 1100\n",
      "16:39:33 [DEBUG] train episode 1002: reward = -13.00, steps = 1680\n",
      "16:46:35 [DEBUG] train episode 1003: reward = 14.00, steps = 2323\n",
      "16:54:55 [DEBUG] train episode 1004: reward = 1.00, steps = 2756\n",
      "17:00:42 [DEBUG] train episode 1005: reward = 15.00, steps = 1902\n",
      "17:07:23 [DEBUG] train episode 1006: reward = -6.00, steps = 2181\n",
      "17:11:28 [DEBUG] train episode 1007: reward = -15.00, steps = 1326\n",
      "17:18:59 [DEBUG] train episode 1008: reward = 10.00, steps = 2482\n",
      "17:25:36 [DEBUG] train episode 1009: reward = -7.00, steps = 2173\n",
      "17:30:15 [DEBUG] train episode 1010: reward = -14.00, steps = 1524\n",
      "17:35:40 [DEBUG] train episode 1011: reward = 18.00, steps = 1779\n",
      "17:40:57 [DEBUG] train episode 1012: reward = -12.00, steps = 1732\n",
      "17:44:46 [DEBUG] train episode 1013: reward = -17.00, steps = 1257\n",
      "17:50:38 [DEBUG] train episode 1014: reward = 16.00, steps = 1942\n",
      "17:57:43 [DEBUG] train episode 1015: reward = 11.00, steps = 2345\n",
      "18:04:31 [DEBUG] train episode 1016: reward = 14.00, steps = 2235\n",
      "18:08:28 [DEBUG] train episode 1017: reward = -16.00, steps = 1299\n",
      "18:13:39 [DEBUG] train episode 1018: reward = 19.00, steps = 1692\n",
      "18:21:56 [DEBUG] train episode 1019: reward = -1.00, steps = 2746\n",
      "18:29:24 [DEBUG] train episode 1020: reward = -5.00, steps = 2466\n",
      "18:34:54 [DEBUG] train episode 1021: reward = -12.00, steps = 1818\n",
      "18:42:38 [DEBUG] train episode 1022: reward = -3.00, steps = 2561\n",
      "18:46:56 [DEBUG] train episode 1023: reward = -14.00, steps = 1430\n",
      "18:54:01 [DEBUG] train episode 1024: reward = 3.00, steps = 2343\n",
      "18:57:26 [DEBUG] train episode 1025: reward = -18.00, steps = 1125\n",
      "19:05:29 [DEBUG] train episode 1026: reward = -5.00, steps = 2664\n",
      "19:09:28 [DEBUG] train episode 1027: reward = -16.00, steps = 1328\n",
      "19:15:15 [DEBUG] train episode 1028: reward = 16.00, steps = 1925\n",
      "19:22:08 [DEBUG] train episode 1029: reward = -8.00, steps = 2283\n",
      "19:29:27 [DEBUG] train episode 1030: reward = 10.00, steps = 2436\n",
      "19:36:47 [DEBUG] train episode 1031: reward = -6.00, steps = 2411\n",
      "19:43:47 [DEBUG] train episode 1032: reward = -8.00, steps = 2331\n",
      "19:46:42 [DEBUG] train episode 1033: reward = -19.00, steps = 975\n",
      "19:54:41 [DEBUG] train episode 1034: reward = 8.00, steps = 2677\n",
      "20:01:34 [DEBUG] train episode 1035: reward = 11.00, steps = 2300\n",
      "20:08:07 [DEBUG] train episode 1036: reward = -9.00, steps = 2187\n",
      "20:15:46 [DEBUG] train episode 1037: reward = 9.00, steps = 2556\n",
      "20:22:01 [DEBUG] train episode 1038: reward = 18.00, steps = 2074\n",
      "20:28:53 [DEBUG] train episode 1039: reward = 11.00, steps = 2294\n",
      "20:34:02 [DEBUG] train episode 1040: reward = 19.00, steps = 1713\n",
      "20:40:28 [DEBUG] train episode 1041: reward = 15.00, steps = 2147\n",
      "20:46:34 [DEBUG] train episode 1042: reward = 11.00, steps = 2033\n",
      "20:52:13 [DEBUG] train episode 1043: reward = -12.00, steps = 1889\n",
      "20:58:02 [DEBUG] train episode 1044: reward = 16.00, steps = 1942\n",
      "21:03:31 [DEBUG] train episode 1045: reward = 16.00, steps = 1830\n",
      "21:09:02 [DEBUG] train episode 1046: reward = -12.00, steps = 1831\n",
      "21:14:56 [DEBUG] train episode 1047: reward = -11.00, steps = 1968\n",
      "21:22:30 [DEBUG] train episode 1048: reward = 11.00, steps = 2506\n",
      "21:26:38 [DEBUG] train episode 1049: reward = -17.00, steps = 1359\n",
      "21:33:51 [DEBUG] train episode 1050: reward = -5.00, steps = 2380\n",
      "21:40:06 [DEBUG] train episode 1051: reward = 11.00, steps = 2053\n",
      "21:44:11 [DEBUG] train episode 1052: reward = -15.00, steps = 1351\n",
      "21:51:30 [DEBUG] train episode 1053: reward = 1.00, steps = 2435\n",
      "21:59:54 [DEBUG] train episode 1054: reward = -1.00, steps = 2770\n",
      "22:08:24 [DEBUG] train episode 1055: reward = -1.00, steps = 2799\n",
      "22:15:51 [DEBUG] train episode 1056: reward = -2.00, steps = 2459\n",
      "22:20:35 [DEBUG] train episode 1057: reward = -14.00, steps = 1572\n",
      "22:26:18 [DEBUG] train episode 1058: reward = -10.00, steps = 1871\n",
      "22:29:34 [DEBUG] train episode 1059: reward = -20.00, steps = 1073\n",
      "22:34:33 [DEBUG] train episode 1060: reward = -12.00, steps = 1641\n",
      "22:41:21 [DEBUG] train episode 1061: reward = -8.00, steps = 2243\n",
      "22:49:14 [DEBUG] train episode 1062: reward = 3.00, steps = 2618\n",
      "22:52:30 [DEBUG] train episode 1063: reward = -18.00, steps = 1089\n",
      "22:59:56 [DEBUG] train episode 1064: reward = 1.00, steps = 2470\n",
      "23:08:40 [DEBUG] train episode 1065: reward = -1.00, steps = 2905\n",
      "23:16:23 [DEBUG] train episode 1066: reward = 4.00, steps = 2562\n",
      "23:21:22 [DEBUG] train episode 1067: reward = 19.00, steps = 1631\n",
      "23:27:28 [DEBUG] train episode 1068: reward = 20.00, steps = 2001\n",
      "23:34:32 [DEBUG] train episode 1069: reward = 14.00, steps = 2313\n",
      "23:34:33 [INFO] ==== test ====\n",
      "23:34:56 [DEBUG] test episode 0: reward = 19.00, steps = 1687\n",
      "23:35:19 [DEBUG] test episode 1: reward = 19.00, steps = 1685\n",
      "23:35:43 [DEBUG] test episode 2: reward = 19.00, steps = 1687\n",
      "23:36:10 [DEBUG] test episode 3: reward = 20.00, steps = 2004\n",
      "23:36:32 [DEBUG] test episode 4: reward = 20.00, steps = 1565\n",
      "23:36:53 [DEBUG] test episode 5: reward = 20.00, steps = 1566\n",
      "23:37:21 [DEBUG] test episode 6: reward = 20.00, steps = 1991\n",
      "23:37:42 [DEBUG] test episode 7: reward = 20.00, steps = 1564\n",
      "23:38:05 [DEBUG] test episode 8: reward = 19.00, steps = 1687\n",
      "23:38:33 [DEBUG] test episode 9: reward = 20.00, steps = 1992\n",
      "23:38:56 [DEBUG] test episode 10: reward = 19.00, steps = 1687\n",
      "23:39:18 [DEBUG] test episode 11: reward = 20.00, steps = 1566\n",
      "23:39:41 [DEBUG] test episode 12: reward = 19.00, steps = 1683\n",
      "23:40:08 [DEBUG] test episode 13: reward = 20.00, steps = 1990\n",
      "23:40:32 [DEBUG] test episode 14: reward = 19.00, steps = 1685\n",
      "23:40:59 [DEBUG] test episode 15: reward = 20.00, steps = 1994\n",
      "23:41:21 [DEBUG] test episode 16: reward = 20.00, steps = 1562\n",
      "23:41:48 [DEBUG] test episode 17: reward = 20.00, steps = 1993\n",
      "23:42:11 [DEBUG] test episode 18: reward = 19.00, steps = 1688\n",
      "23:42:34 [DEBUG] test episode 19: reward = 19.00, steps = 1688\n",
      "23:42:58 [DEBUG] test episode 20: reward = 19.00, steps = 1684\n",
      "23:43:26 [DEBUG] test episode 21: reward = 20.00, steps = 2003\n",
      "23:43:53 [DEBUG] test episode 22: reward = 20.00, steps = 1997\n",
      "23:44:21 [DEBUG] test episode 23: reward = 20.00, steps = 1993\n",
      "23:44:48 [DEBUG] test episode 24: reward = 20.00, steps = 2007\n",
      "23:45:12 [DEBUG] test episode 25: reward = 19.00, steps = 1686\n",
      "23:45:35 [DEBUG] test episode 26: reward = 19.00, steps = 1684\n",
      "23:45:58 [DEBUG] test episode 27: reward = 19.00, steps = 1685\n",
      "23:46:26 [DEBUG] test episode 28: reward = 20.00, steps = 1997\n",
      "23:46:49 [DEBUG] test episode 29: reward = 19.00, steps = 1683\n",
      "23:47:16 [DEBUG] test episode 30: reward = 20.00, steps = 1993\n",
      "23:47:39 [DEBUG] test episode 31: reward = 19.00, steps = 1683\n",
      "23:48:07 [DEBUG] test episode 32: reward = 20.00, steps = 2005\n",
      "23:48:35 [DEBUG] test episode 33: reward = 20.00, steps = 2005\n",
      "23:49:02 [DEBUG] test episode 34: reward = 20.00, steps = 2006\n",
      "23:49:24 [DEBUG] test episode 35: reward = 20.00, steps = 1566\n",
      "23:49:45 [DEBUG] test episode 36: reward = 20.00, steps = 1563\n",
      "23:50:13 [DEBUG] test episode 37: reward = 20.00, steps = 1992\n",
      "23:50:40 [DEBUG] test episode 38: reward = 20.00, steps = 2003\n",
      "23:51:03 [DEBUG] test episode 39: reward = 19.00, steps = 1688\n",
      "23:51:25 [DEBUG] test episode 40: reward = 20.00, steps = 1565\n",
      "23:51:48 [DEBUG] test episode 41: reward = 19.00, steps = 1683\n",
      "23:52:11 [DEBUG] test episode 42: reward = 19.00, steps = 1690\n",
      "23:52:39 [DEBUG] test episode 43: reward = 20.00, steps = 2005\n",
      "23:53:02 [DEBUG] test episode 44: reward = 19.00, steps = 1686\n",
      "23:53:25 [DEBUG] test episode 45: reward = 19.00, steps = 1688\n",
      "23:53:52 [DEBUG] test episode 46: reward = 20.00, steps = 1993\n",
      "23:54:15 [DEBUG] test episode 47: reward = 19.00, steps = 1684\n",
      "23:54:37 [DEBUG] test episode 48: reward = 20.00, steps = 1567\n",
      "23:55:04 [DEBUG] test episode 49: reward = 20.00, steps = 2005\n",
      "23:55:32 [DEBUG] test episode 50: reward = 20.00, steps = 2001\n",
      "23:55:59 [DEBUG] test episode 51: reward = 20.00, steps = 2002\n",
      "23:56:27 [DEBUG] test episode 52: reward = 20.00, steps = 2006\n",
      "23:56:55 [DEBUG] test episode 53: reward = 20.00, steps = 2006\n",
      "23:57:18 [DEBUG] test episode 54: reward = 19.00, steps = 1688\n",
      "23:57:39 [DEBUG] test episode 55: reward = 20.00, steps = 1561\n",
      "23:58:03 [DEBUG] test episode 56: reward = 19.00, steps = 1686\n",
      "23:58:30 [DEBUG] test episode 57: reward = 20.00, steps = 1996\n",
      "23:58:58 [DEBUG] test episode 58: reward = 20.00, steps = 2001\n",
      "23:59:25 [DEBUG] test episode 59: reward = 20.00, steps = 1994\n",
      "23:59:53 [DEBUG] test episode 60: reward = 20.00, steps = 1991\n",
      "00:00:18 [DEBUG] test episode 61: reward = 19.00, steps = 1683\n",
      "00:00:49 [DEBUG] test episode 62: reward = 20.00, steps = 2003\n",
      "00:01:12 [DEBUG] test episode 63: reward = 19.00, steps = 1687\n",
      "00:01:40 [DEBUG] test episode 64: reward = 20.00, steps = 1990\n",
      "00:02:08 [DEBUG] test episode 65: reward = 20.00, steps = 2005\n",
      "00:02:29 [DEBUG] test episode 66: reward = 20.00, steps = 1561\n",
      "00:02:53 [DEBUG] test episode 67: reward = 19.00, steps = 1683\n",
      "00:03:16 [DEBUG] test episode 68: reward = 19.00, steps = 1684\n",
      "00:03:39 [DEBUG] test episode 69: reward = 19.00, steps = 1686\n",
      "00:04:06 [DEBUG] test episode 70: reward = 20.00, steps = 2001\n",
      "00:04:30 [DEBUG] test episode 71: reward = 19.00, steps = 1683\n",
      "00:04:51 [DEBUG] test episode 72: reward = 20.00, steps = 1562\n",
      "00:05:19 [DEBUG] test episode 73: reward = 20.00, steps = 2004\n",
      "00:05:40 [DEBUG] test episode 74: reward = 20.00, steps = 1566\n",
      "00:06:09 [DEBUG] test episode 75: reward = 20.00, steps = 2001\n",
      "00:06:35 [DEBUG] test episode 76: reward = 20.00, steps = 1564\n",
      "00:07:00 [DEBUG] test episode 77: reward = 20.00, steps = 1564\n",
      "00:07:23 [DEBUG] test episode 78: reward = 20.00, steps = 1566\n",
      "00:07:44 [DEBUG] test episode 79: reward = 20.00, steps = 1565\n",
      "00:08:12 [DEBUG] test episode 80: reward = 20.00, steps = 1996\n",
      "00:08:42 [DEBUG] test episode 81: reward = 20.00, steps = 1992\n",
      "00:09:05 [DEBUG] test episode 82: reward = 19.00, steps = 1685\n",
      "00:09:35 [DEBUG] test episode 83: reward = 20.00, steps = 1993\n",
      "00:10:02 [DEBUG] test episode 84: reward = 20.00, steps = 1993\n",
      "00:10:26 [DEBUG] test episode 85: reward = 19.00, steps = 1685\n",
      "00:10:55 [DEBUG] test episode 86: reward = 20.00, steps = 2003\n",
      "00:11:25 [DEBUG] test episode 87: reward = 20.00, steps = 1996\n",
      "00:11:49 [DEBUG] test episode 88: reward = 19.00, steps = 1684\n",
      "00:12:16 [DEBUG] test episode 89: reward = 20.00, steps = 1990\n",
      "00:12:44 [DEBUG] test episode 90: reward = 20.00, steps = 1992\n",
      "00:13:12 [DEBUG] test episode 91: reward = 20.00, steps = 2003\n",
      "00:13:41 [DEBUG] test episode 92: reward = 20.00, steps = 1996\n",
      "00:14:06 [DEBUG] test episode 93: reward = 19.00, steps = 1690\n",
      "00:14:27 [DEBUG] test episode 94: reward = 20.00, steps = 1565\n",
      "00:14:56 [DEBUG] test episode 95: reward = 20.00, steps = 1992\n",
      "00:15:22 [DEBUG] test episode 96: reward = 19.00, steps = 1684\n",
      "00:15:50 [DEBUG] test episode 97: reward = 20.00, steps = 2005\n",
      "00:16:11 [DEBUG] test episode 98: reward = 20.00, steps = 1562\n",
      "00:16:34 [DEBUG] test episode 99: reward = 19.00, steps = 1686\n",
      "00:16:34 [INFO] average episode reward = 19.65 ± 0.48\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXkAAAD4CAYAAAAJmJb0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8rg+JYAAAACXBIWXMAAAsTAAALEwEAmpwYAABJaklEQVR4nO2debwUxdX3fzV3ZZNFVlkFEQSCG7K44MoSMUGjScwbnvg8iSEmmsTsmEXzxCXE5InGPCaGxESfJGpwNy6oIIqKIiCgLJd958K9LHdfZ6beP6a7p6enqrt6m54793w/H7gz3dVV1T1dp0+fc+oU45yDIAiCKExiUXeAIAiCCA8S8gRBEAUMCXmCIIgChoQ8QRBEAUNCniAIooApjroDZvr27ctHjBgRdTcIgiA6FGvXrj3KOe8n2pdXQn7EiBFYs2ZN1N0gCILoUDDG9sr2kbmGIAiigCEhTxAEUcCQkCcIgihgSMgTBEEUMCTkCYIgChjfQp4xNpQxtpwxtoUxtokx9m1tex/G2OuMse3a397+u0sQBEG4IQhNPg7ge5zzMwBMBXAzY2wcgAUAlnHORwNYpn0nCIIgcohvIc85r+Scf6h9rgewBcBgAHMBPKoVexTA1X7bIojOQDyRxOI1+5FMUhrwzsKz6w6goTUeSt2B2uQZYyMAnA1gFYABnPNKIPUgANBfcsx8xtgaxtia6urqILtDEB2Sv727Bz986iP8a83+qLtC5IA1e47jO//agDv/vTmU+gMT8oyx7gCeBnAr57xO9TjO+SLO+STO+aR+/YSzcgmiU3G8qS31t7Et4p4QuaC6vhUAUNMczu8diJBnjJUgJeD/yTl/Rtt8hDE2SNs/CEBVEG0RRKHDtL+0alvnQDfTdCsLJ8tMENE1DMDDALZwzn9r2vUCgBu0zzcAeN5vWwTRGYixlJgnk3znoFET8t3zVcgDuADAfwC4jDG2Xvt3JYCFAGYwxrYDmKF9JwjCgZimyidD0ORf+qgSr28+4quO5Vur8Ny6gwH1qGPQFk/izhc3o7apXVqmqr4Fv3x5CxKWp3MyyXHvkgocqmkWHtfYlgAAdC0NR8j7rpVz/g7Sb5hWLvdbP0F0NpimyYdhrbn5sQ8BAHsWzvFcx3/9bTUA4OqzBwfSp47A8+sP4uF3dqOlPYG7r/mEsMyPnvoIy7dWY/rp/XDBaX2N7Zsr6/CHN3fi/V3H8Mw3Lsg6rj2RBACUFsnEqD9oxitB5BmajCebfB7Rnkj9FlYt3U2Z1nhSuF03z4X1a5OQJ4g8I+xBT7iHa7+G/pYlQraLGeY3yX7o+8P5xUnIE0SeEfagJ9yjC2gbGS+FQTe/iX/PWCw88xxAQp4oAD46UIOZ971lRCmo8IMnN+BPb+301e6yLUdw7R9X+p6ZeqimGZf95k3DMacP+iCia5ZtOYLP/OFdxz6+8nElPv+n93y3t3jNftz4qPvV3f7vvT345uPrfLcfBN/913o8unIPEkmOqx98F8u3VhkSOCYQ8hsP1mLGb99CfYv4/lu0InWfOQnxsKKpSMgTHZ6Fr1Rg25EGrN9fo3zMk2sP4JevVPhq95bH1mHt3hNoiSd81fPE6v3YdbQRi7UZrizA6JpvPr4OH+6rQVO7fR+//s8PsWr3cd/t/fCpj7B0i/vonduf34R/bzjku/0geGbdQdzxwibUNLVh/f4afG/xBkMAxwSq/K9f3YrtVen7z/qrPbc+dV6y39PwwYRkoCMhTxAe0R1sooHvh1h61PtGlyuqPSRnr5gkV/+tZdcwIRPyIHMNQeQlYdnMw7DJqz6HSManMV8KPzZ5oz7JtY2FHE1FQp4gLHDOEU+Iw93MyDQzv8QCjJM3okIUdfkgHyzxRLIg3gwY0gJY9TqKcDLXkE2eIHLE9xZvwJn//RraJHHNOvqY5RwYseAlLHj6IwDA5f/zJq7940oAQH1LO0YseAmPrdqXcWxtU2r7k4JMk0EOelkdE+54VbjdXPzLj6zGpLuW2tb/8xc2YcSCl4x/Rj2cY/I9yzD/72vddtmR6fcux/WL3DmJr1/0Hqbfu9xTexzp31rkeFWuR/JbmO+jMCAhTxAWnll3EI1tCTS3qTlUdW35idUpgb2zuhFr954AAByubQEAPPzOroxj9h1vAgA8+t6erPqYkbsmuFFvdepZc5eLnL1vVFThaEOrbb2PrNwj3J7kqSyaflMoiNh3vAnv73LnJH5/13HjmnvBsMkrSHnZryb7Pbnxl8w1BJFTghx0dpNorARqo1WsIp350n+TqXo6vplGhyFcmzxp8gQREarmErvBKdtl9wAJcsar3o6TAAkzX05HxXwtkiHa5NO/EWnyBJFTVAedSimZaGhuS6C+JTOzYVBx8kfqWowHlVNNdhE9TW3ul6Vzaq+lPYGapuxFMvxMLKuqa0FlbWpCWSLJUVXfklXmSF32Njeo2uTb4smsRV8qa1uEC8Fwxd/IKyTkCUJCIJq0QyU7qxvxt3f3ZGwLQqvesL8GU+5ZZptQy0zMxg8w7ZdveO+IhGv+sBJn/eL1rO1eI5YO1jRj8j3LMO2Xb2D51irc9/o2TL57GaosQn3KPcvw/q5jSnWaTTP6w0fJXMOBbz7+Ic65M/v8PvX7d6SHUe4agsgxqmPOTuNPJ7ZSbzetVasfY2V7VUNmPxztNfI2a5vlOdRlODW3pVK8QqjqQ8nKAZNTddPB2lQqAgBV9dmO482H1FYnNZ+D/lFpMhQ4Xt0kdjgfFOSU138bsskTRI4JwkaannHqxvFqn9AqDIzeRWyTj3sU8nqaXwAoisVQpNlVRPW5bYExk00+4NnNQPoeoTh5gsgxqmNOySbvQjaEsTKUU0125hpv7Xmrx6sm35ZIh7sWxdLn47U+K+ncNYFUl4ERQknmGoLILermGvf77I4JYsarVWA4R9ek/kad3tizkDdNXIsxZmjyovPxIqdlM15X7zmuPJ9CXnfm36AJZ1FBgigAIhN4IU9zt2kyMGuN10sXTzqnkxBhXnWpKMZQFLAmL5rxWlXfgs8+lD3z1u2562895HgliByjPORs4+TdD9x0nHyAg14xTr4QNPmiGENMk2x+QjLN1z9tk0/v96vBG+1QCCVBRIN6nLy8nK6YunHYBT37FHB+YITRphfiCW8dMDteY4yhWJPyopBMLy2kZ7ymf8fAZgcbbZAmTxA5JYgxl54pqY6hheZS4hqpFHLXpAiv59xmWrglxpiRY8ZrtI4V/YGvlk8++ziVA/yuMCaDhDxBSPhg93Gs3HHUsZzdODZrknuPNeLZdQdSx0jKH2toNTJW6mO+pT2BRSt2orE1jkUrdkrTIFccrsOSjZXCfSt3HsMHlpWfnl57wPjsFF2z+2gjnl9/UNLrbL78yGrlsodMseOPf5CZlbO2uR1/fWe3UFg+8m56u1mTL44xFGmy+E9v7US7QtpoEVsq67VPzBQnn96/STHe3om0Jh9IdVmQ45UgJHzvyQ0AgD0L59iWsxub3GTLver376C+JY5rzh4iLf/tJ9Zj9Z4TGcf+4c2deGDZdvzprV041tiGnl1K8PnzhmUdO/v+t6X9/cY/P8za9r0nN+Az5wwGY8wxumbWfSvQlkhi7lmDbc42zcqdarNKAeALf37f+PzQWztx6xWjUV5SBAC4/fmNeH79IYwd1APnj+qbcdzP/70ZZw3rjbOG9srQ2GOxdHTN+7uOZ6V5Vn2ruuGvHxifRTNeb34s+5pacaHIo2tpkWLP3EGaPEGESMKwyUO60LMZc24TffA3aMcd0/Y1BeTwM7fhZJNv86gNq3BUMCtV50RTarZtqyS3f0Jzeph9DuY4eQBo9JB7x4qbGa+i4+zLcBTFGBZeO9F1v1QgIU8QPrGzu7rNXsgzPvOMv2FgXbs0apu8rA+yqydyaJvj5EX1eXO8qh/ltn7OvcXuq0JCniB8YjeoZc40FYecx5DxVP0uy0U5GcpPqgDRwykVQhms2EzHybvU5BWuJ4e/PPVOkJAniBDRZbw5MZXdwDeP9VwI3HQTweWwzyUi2VjE0pOhgiL9xuPuOCVzDfeXp94JEvIE4ROV6BqRrd0JIxe8B8mrKjKsWTKjngwFZJqnnDRhkWbNLOaaQPokiJOXl3V3DTnCtdeQkCcIn9hOhvIxGcdPwir1NlJ/047X6IW8CJlwFW0ujgUv5L2+GChdTrLJE0THRWSTD3LFqaDQhVgUMt4q4Nz0QaTJx2LIMNf4fXCZm1C1sac/k02eIDo+NuNY5HdVFTnSNUEDFMTW6JpcJkWT4aYLMYEE4xyBOl7DfvBxzskmTxD5jFkGjLztpYx9ooRbqgt/+xEuP3zqI6VyVnONG5v80s3i1Y+cuPWJdQ594ph9/wo8uHyHY13NbQmc8bMlWF5RZWz7yqNrsHJneqay9ZTufHEzRix4CTf9fa27jsN9JJDqZKgwNXma8UoQAWKV6aLXe9krvLWoLnDDtJOnQyjdx8n/akmFpzafW3/IsU8Vh+tRcXgrLhrd17bs3mNNaG5PYM3eE1nbnViy6bBjGcC9ucYtHGSTJ4i8RjV3jbm87BBmKZexL4xViSwpdN1o8l4X3XbC3WSoULqg3L4I1/nkeTjLCuqQkCcIn9hH16jXw1im8Dc0eX2/0V5wWCdDuRFQgWVNzPK8qh+qtrB2dCiZa8BJkyeIsEgmeWgpXvX6rdilvzWbA/SP1vKc88BXPNIdf06avLl/ql1w2183aRxU3jzUl3F07qe+V/V8ROcSTySzf2eKriGIcBj545dx3UMrfdVhJ0REQmjCHa/iSG2LsHzF4Xrjc5JzHKxpzsqieNdLWzDqxy9766wFq7nGSXQtePrjjP6p8Ltl2zHqxy+jSTFR2J0vblEqB6SydvrhmQ/T6Za/+n9rlK5rY2vcoVz6uvz4mY+z9p72k1cyriNANnmCCJUP99X4Ot5O1Mk0vv0nnB2DHMDOqgbjexh2W2tOFifB/a816Xzvqm9A+kOqrllNyD9tErzpmaZKhwqxezN46aN0/v2lW6qEZax+khNNbcJyImROZvN15Jznv02eMfZXxlgVY2yjaVsfxtjrjLHt2t/eQbRFEPmGXcSFbFeRKMDbod4wxECWvd8xjUD6s1vHq0yO2Z2XkXYhVF1XnTAMex1lMtQjAGZbti0AsIxzPhrAMu07QRQcbqNrABgrF9nVk7SJwgkKnjbKC/tgpXtZOupaNcV8Hsyv8k06xJQHPjmqQ6Qa5pyvAHDcsnkugEe1z48CuDqItggiTHZU1TsXsiAb9A2tcam5pqgoe+hZTSVJzkPPJbP7aCPa4knlGa89ykuMz277Vt/S7rp/QWDXzXDCUl2WRwcw10gYwDmvBADtb39RIcbYfMbYGsbYmurq6hC7QxD2bKmswxW/XeH6OJnN95oH35UKwhLBtHurkLfG04chB6576D3c8cIm5Rmv5iXqVM01ejFP1zaAZ1y+v0l0CE3eD5zzRZzzSZzzSf369Yu6O0QnxrygtBtkgmh7VYNUkxflVrGaP7Jt8uGIglW7jynHyZsfNGGGntq1GyVqce8u60THsMmLOMIYGwQA2l+x65ogOjh22q9MDorGtFWoc+M/m4MCIMaY8QBxMsGYHzSBzYXKFwmuQBhLMaYuecc017wA4Abt8w0Ang+xLYLwjVfTgJ2wk2aSFGyzav25WsCjiDFTWgP14/JhgRElbPvpLFwzc9f47042PP81ecbY4wDeAzCGMXaAMfYVAAsBzGCMbQcwQ/tOEHmL9/Frp8lLhLxgu1XAWtd4DUsOmE1HbjTVprZEGN3Ja1SujqfcNZ56o0YgWSg551+Q7Lo8iPoJIhd4jWSxDaGUhBmKjhFG1+TAbVgUAxLJMPPJ+3h85tnLQhj9CTvVcOSOV4Lo6ARlrhEuFehBqLh9WKVs8t6ODQIVAedHBubZcyKLVIKyjmmTJ4i8RCbIvAoD2yyUkieASKCLbPLmYqranls5HbbjM2ptPMj2w3K8kiZPEHmM1XaesU8iE8TmGvsyqtqeWzFknn3rdGzQwihfFw43Y77uqqmD3cDRAWzyBNGRkA1U2fZ9x5rw8sZKnD6gO5JJYNfRBswaP9DYf+UDb0vbkk0YulewqpJV699e1ZAhLprbMx2dnHMkOXDf69tw40WnGtu//g93y9oVxdLRNWHYNuwSeimnAUZq2cABJ5W7bn/ToVrpPsaADftrUHG4zqbtzOiXxabkYsLyXhyvIaryJOSJTodNOjHh1nkPr8K+45lZIx9duVetLcmIr2vJzsgoMuHsrG7I2pauG1i25Qj+d/kOHDRN5HrN5dqrKgtvmNt0ix9nrq4V7z7a6LhsoIzlW+1n0s998F31/nCO37/hvPasG8J2rpO5huh0SG3ykrHW2JotkBsVc6O7WSxDpPUX2QjgJOdojadsRW1xxWxhAsxCPhfRPGZUTRVx1WxoIRC6RYls8gQRLF6mnXvFbhUoKyInbYkoXaUGR1r7F6VJUKUopm5zjsomn8gT030oIZQgIU8QgSK1yYfQlpvMiyJBZie8k5ynhbwPIeHnAeEX1WseloNW5cy55LNKeRU4pxBKgggUmUlC7pD1LmAe/8DeSWfm3xuybc52TXOenmxlZ9ZxIiO6Jk80ZitBrWnrBdG6u4HWD9LkCSJQ5Jq8+gjOle7rtOqUbuLxo40nTTbhXItSp8gS/fQjlPEA0mGUocXJB15rGhLyBKGRSzOOKnZtc6TNNX40+bhdoH/IOAlNrlguVyjFybtU91OaPJlrCKJTYp8yIb1fYclYKfGE2RyR4+gaxebCyl2vIlvDviQpm3x4kJAnOh1uNfYo7dT25hpuhF360QTjSW4yR+QnEUZQgiP9JhHG9eFAqPYaEvJEp8P1tPMIpbxd00mTTf6xVfs8t1FeksdiQDt/1aUGQ+mCOYdQSJ5X0uQJIkCCiKLJ1WpGtg8kHkzUydlDe6erzLEsVXU65kuOm3A0+Y67kDdB5CVBDNRcRdc4pTEOYnWmzAdJrme8qrUX1ipUKvHpGVcnDEWeNHmCCBa3aQ2ixDZOHsEIv3w8bytR2uSTSR7qmwSlGiaIgMllWgO/2C8SzgOJHw9bU7VtWzW6Jk+eRCpvHq6zUNKiIQQRLEFMhsoVTpOhgrDJRxo95LBfz8IZVgilChymeP2wzDWkyRNEgASgyufI72qrqXPOAxF+5odb7me8cttreawxlYs+yuga80VRy13jfjJUmJCQJwiNPLEIZOBskw+iEVNagzy8BkD0bxt6++Fp8mSuIYjAcJ2gLMS+OGG7fqxpMpS/NqJDte2wEpSpzXjlpslQ4QRRUnQNQQRIPgpzmbBxmgwVRNRHRpbFXIdQdjDHa1CYf2+yyROEIm7WC3VXb/gCRrYEn1Nag6BCKJnpc05RnNMfpZA3m2tUbh730TUk5AkiUORx8m5GZ7CjUlabveM1mLbz1RxlJtp88uZsmMGTSHJfWUSdICFPFAyqAiMf4+Sl5hq7ZccD6lgyw1xjT67SOVgJS8Yr2eTN0UcBXXRzs0nOQ12di4Q80enIS5u8RJd3SmsQxCQa8/XobKmGVeDcXXSN20tImjxBKKJuk3cn5XMh97w4XgvBFal6DtE7XtXb31JZ51hGfyN6Z/tRbDxYG6omXxxazQSRr7ic8ZqLiBO5kLcPoQyCaFMp20+GMsqF3xWltlX68Zd3divXPe/hVQD8rezlBGnyRMGgKgjcR9dkbwt6TMrNNeHb5FMBLizQOoMmtDh5RXNX0JOhrK0WkU2eIMJHnmc+/LZlY9zWXKOoBTsR9WxSFSI115ht8iG9U5DjlSAUUDU7eMkSGDayqBW7ljmCCeTMzF2Tn47XSEMoM6JrwmmjiOLkCSI45LZ3MbmQL/I4+VzY5KObDMUVp/RHKOO1OPlgO2B9ppO5hiAUULbJuzTL5MIx6Sm6pgAmQ6kSqXMYZpt8SOYacrwSRHDIhqlMWxNpkUEPSam5JkeavOhzTlDM2xKlucZMUL2wOnxJkycIFVTj5PMwhETqeLU5hgeW9ER9xmvQ1y4jL4wNibB+MuUslCkCcwBb2iXHK0EESJRRNDJkmrxTCGUgIt7FeQcd5dIRZrwmXc549QLFyRORsWbPcZx391LUtbRH3RVHVJ1jHx2o9d1WVX2r7zrMyIb4P97fJz2m4nAdfrdsu++2M3LXcI5r/vAu/vH+XklZ381loSLf3tlxNPiGAdQ1u7uv/7lK/nu4oS2eRFNb3Ph+uLYlkHpFkJAnbPnt69tQXd+KjwMQjPlCdX1qQBVbXpEjzV3jQZNbtGKXrzZH9u0GIDOfOQewbl8NfvrcRuExwZtrojWdrd5zXKlcGP3cerje+PyBYj+8QEKeKBhU5U9cU0cH9iz3VkEIeHlbb0skfbX5w9ljcErPclfiy+4SnWK9nj7rywVlxUWRtd0W9/f7qRK6kGeMzWaMbWWM7WCMLQi7PSJYoh6EYaBHauSVJu/hmNZ2/0KCMZb5GztcBLvd7R5sOVHfXqXFiiIwhI76fUirEqqQZ4wVAXgQwCcBjAPwBcbYuDDbJMIhmizi7nCryVvD1qJ07kWhyeu/qnlCkpNZws7x2u6xP0GkS/ZKaZGaCAzjzigUTX4ygB2c812c8zYATwCYG3KbRIBEbTMNg3hC1+Qzb/8oz9TLZJjW9oTvdhmDqxO3e5C2exBaUYezlqlq8iFQKEJ+MID9pu8HtG0GjLH5jLE1jLE11dXVIXeH8EwHUOVVxUUimRpcVrka5XwbT0I+ACHBmCWVrsM1sNXkvZhrItYhihWTxoTxMCoIcw3EoiHjanHOF3HOJ3HOJ/Xr1y/k7hCEXBhFrVW6JR7AU4mBZZy382Qo+T6v5pooCWvxdxUKRZM/AGCo6fsQAIdCbpMIkI4k91SFtGyKfLSToaJr181p5yK/fWehPbRpvJmELeRXAxjNGDuVMVYK4HoAL4TcZoenrqUdxxqCnWzjlyidY064nZik2+StQinKnOWRCXkADS1xHG9sA+AsqP1eopqmtqz6ojp3N+w91hR4nUdzNMZDFfKc8ziAWwC8CmALgMWc801htlkITL1nGc69a2nU3egw7KhqAODeJm91Knc0x6tfGEuFUC6rqMIeTYg5Odrt9p85pKdjm2f94nXl+nKBauvr99cE3vZrmw8bnwee5H6OgSqhu5Y55y9zzk/nnI/inN8ddnuFQFOb/6gJQo5uk88rTT6CNr3kvpG5Ab44ZRi+dvGonPQBAPr1KMva9oXJwzy0H91vbs5X8+AXzw6tHZrxShQMyqsM6eYaj8eHgZe0BsG0m/ndq7nmlF5dpJk0w+CUXl2ytvXuWpK7DgRAfUs6d01pUXgzb0nIE7Z0LF+aWm/1qBSr5h6lVheFjNfNNWa8phpOVeP+JLxecVFLHcG2b6a+NS3kw+w7Cfk8Z/nWKjTngfkmXweQGwf1B7uP42hDK+KaTd4qYaLU5KOwyQPuxbLdJfKiyXt9sIoul5drGKUS02DS5MOEhHwes/VwPf7rb6vxs+fFGQEJ4PpF7xufneTF5/70Hq7940pDk7cWj3IyVFTP0Cy56HARZX4LBubJ5BSsJu/lKeOxAwHQbJqxTJp8J+WEFm6273jw4VvK5Lm9ZrsWWaPK3mNNaZu81VwT4clGF0Lpzlwjy+/DWG4fVCKBrtr+VRMHKbcTZtSLmTBDlEnI5xlmwaPHc5coTr0Ok+h74IyqiI4bIZSZRKvJ54fj1SlJm0zRZ4K6VPBqIvNjk3ej8efSmRwWJOTzDPMsOH2auDWRFuGPuCSEsqPlkw8DR8erzT5v58A9mVn82OTNpZzON1dRT2Su6UQYTkGklxrzosm/+NEhbAhgAoduwnj6wwMZK9nkI3Yy2vyGlDBs8ultD7+zO1pNPorJUIJ2na6B1CbPvL2NeNfkvZtrMtu370CufhYS8p0Isya/dMsRAN40+VseW4e5D74bWL8WrzmAWfevCKy+XGMWXvobkul5ijtf3Iy9Ifo+LjytL26YNly6PzLHq+W7k9Cz7p49fiAG9SzHp88cjEkjertu3/NzVXDB1M01LprJlZAnm3znIS7I5KeaDjUMOlLSKTvHaVKgyVsRXfug+MeNU3C9zYzMqCxyboWYVZMf3rcr3rvtcgzsWY4e5SV4+VsXBdg7OX6ia9yYa3IV2kqafCdClD62RHH1GkKOWTilbfLWyVDh9sFuIOeN49VJk3dZnxOezTU+Lpc7x2ueOEt8QNIjzxDl5LauRUqIsbfJpz/HJWkNws5dYyfI8yaE0jGtQWYB6/GuhbxHg01QD0Wn883VzxJmO8Uh1k14QJRjupg0eV9sPVyPu1/eYnyXRdeEbZmy1eQjkvJW/cHZ8Wq/363w/ehAravyRjt+bPI+2wkDMtd0IkR2YVLk1ZDJn+8/uQErtqWXlpSmGg5Qk798bP+sbXY/YxQ/MWPZs1Sd3mac9rsVVkcbWj0JOKGQV72KpmJObxK5e/iS47XTINLko/R9diC/qzK6ucaqlcq0VC/ZDb91+eisbXbyIooHOeccRS4btsp46zm5zoUTZAilsoxPF3RqP1e/C2nynQhznLxOR1t7NCpk18mqrUnNNZLjvTjfxIfY2eSjeV0rsmryPicLuI7W8dheri4XOV6JwBFq8iTjAyUdQmnNXSPGy+X3o2nmCsZYVuim/wlh7k4yyAloqi3n2+8AhGuuIyGfZ3z2oZVZ2/wI+V+/WoHp9y73fHxHe4v484pdGHf7koxt1lPQI5is29/cWg0Rsrh6O8Q2YzlR+V2s5hq30S7WbnuJuw9K6HpxvDqba3IVJx9eOxRdk2eI5Imf7IgPLt/pozcdD3MUjYyEJNWwDCeTQllxDK1x54lUdgM5ijh5hmwh5veZ7vYsvIatirNQKk6GyssZr+FBmnwHIAhl2os22tGQXSfrdtlkKBkJm3Izxg1AWbHaMMo3KwGHQJP3ebO51UiTnHt6wAV1LZ3Ol2a8EjkhCPHsdRWaQnw06GGqypq8jSCwXw4vE1uBEZW5xuXyf1b8RtckuTcB5y9O3hRd41C2EMKXSch3ADhPmQyq6lpwpK7FUx11Le3G5+a2hGGCaGrLzRJkQdLSnhC+mZjNWmbhay0ZN85dbVlFQcBTeh8XCwq3jtdI4uQBxCxSzO+sX082eS/teDjGONZ0sL4wj1LhEKEEZZ0czjkWLqnA5HuWYco9y/DSR5Wu62jQFg1uaovjjNuX4N5Xt2LjwVqMu/1VvPyxvL589LuO/dkSzP+/NbZlzP22atu6kG9TsKMD9uYaFTPYsD5dHctEEarX/6Sy7BBK3zZ593H3XpyOlbXZyo5ygjJTsZZ2+3uA4uSJnMABvLP9qPF97/FG13XoE4DqmlPC/ukPD+Djg6kp5W9JokrymWUVVVnbMgS7zbFu/RO25hrJdvOg/fctF2Zti5p/zZ+KiUN6CWzy7urxm7smmfSmyftbEpMSlBERIUt1yy1hZl60a10b1f8Wx9LD0y56p6Pe437NDt3L0oFnqouRmDFft57ajFnb6JocX+czh/YCkG2usZ5Pj/JwA/ASnHuyvYgOCeMS5iypAWnynYM2TciXWhKSWcWIlwgIXXvVF7GOMaZ0Y+WjuUZGponGX125Fhi5FvJ6e9alCqwPR7fd8pJq2JNNXhRCqep4zcMQyjAhIZ9HtGr2wbISi5DnwJbKOuO7bm1ojScytuusFyz7pw/eLYdT5Q/WNGfUL2LzoTql+O98xCysROfXo8xBQ1Uc3PKFrd06XqORJtmafOZ+Jzt3VnSNlxBKL2kjFLepHisv2/EnQ5GQzyP0mZhlxUUZ23dWN2Q4xPSB+LPnNuKTv3s7I+ImnkjiasGyf7om/7W/rzW22TmdDte24MoH3hY+RPKVQ6YHlxPlpUXOhTSGnyx3nCZ5dnjNRaP7us6SmHNNXuuL3xDK7Hrd4X0ylKfDAORnWDBNhuokmO3lZo42tGZ81wfGh/tqAAA1TenwSFHuG0A8a7O5PRVCKDqiur5VsDW/qWlOX4cMTV5whk5RE+bdo/v3kJYTyai//ed5kuXp7NsEgLED5W2Z+ey5Q2z3Tzm1j1I9VseritC9YdpwzJ8+UrjPfQhlkOYatZry0QRJNvlOgi6HnSIeuOVhYI4WaZcEdYvCAO3WNNUfAB0V5xWO7PcrCwzBA0S2yItKjapLPTpFfXR3Mkfp9Sjea2YYY+hWmqo/K3eN6xBKb7lr/EyGyk9dPjxIyOcR+oDKThoF4fcigZCPSzR5UdigLOUukL9CXtXp7GSTDwqpTd6l0ditTdapuOr+LHONwrq39itc2bdrJZHk3lI5K24TkZeafIgGG0pQlkdwiSZvNZ1sPFiLtXuPpzV58yLVEu387e1H0c2i3T3+wb5UuwLNZt2+E+46nyPMD6sTjZmzFVXj5FUwyx07GWQTfJpdj51NXrlnzn1yQ7a5JnO/bDavLOzWS1qDisP1Lo/y56h0I+T9JAd0A5lrOglJiSZvZfnWalz7x/eMV+2EyUTTLpno8/A7u/HZh97L2HakTnt4CA6pqHQ/8HKB+fRufuxDaTluetYFMUxFy/mpVG7+Jc0D2WpO0fedPkDNJu8s5Oz36xp8VhZKhau1s7rB3BE3zWZhfVCrImzG0pcvX3Cq8NhcCW43kOO1k6ALMNWp1GmbfHqbnZ3dDY15mtPGbIY5aBNNE/RA/tlV46T9EWq7TP+b/jHNP+vG/56VWV7727OL2lKDjiLepsCehXMMBcHqAsjS5EW+HJtkPm7NDm0e71dxquFMbp2RvQRj3kKafOdAF2Cq0+51Lcw86GTRNXaIjqjzmLUybOxs7WZxmxly6l/gy+zG0rQGlr+A04zXgG3yivU4zXgVnV9RLObOF2FDkCGUfjNiishH+71bSMjnEdylkNfNOmbFyk7LckO9KWtlPmF3beySkvlFJrzkaQ2yzSG2M15d9sfJWakqbLMdr7D9DmSG+GZH17gjoNu1w0NZKDsJuvyKuxTyZsG+atdx1+0+u+4gRix4CdPvXY77l24DANTngSbPOcd9r2/D7qPphGwZGrqNSSaznHusQ86q8SrXrejAdVPGUq0vHMN1BWdo5zNy+0bidTEbUReyk6WJ+xLV7GI7yPHaSXBrrjE0edPIvOOFTZ7b33e8Cfcv3Y54IomWPAihrG5oxe+Wbcd/PLzK2Oa0FJ+Ok03eqZbS4hhKTSs+yeSaPK1B5t/U5yDNNQ6avKIg6921NOO71Xwi0+SdzFSqeLXJC9tmwF1XT1Aqp0pQ74P/b8ow2/3keO0k6Aq5ql1d5HgNggTn+bFcoNYFc953VRtuRjEPp8LA8MD1ZxvfXdvkWeZfrVJ5ey5Hud84eZ1zh/fO+C6bk6Hajtvz8GpeFOYGAjBv6vCM7/nAnImD0Ld7mW2ZvM1dwxj7LGNsE2MsyRibZNl3G2NsB2NsK2NslqwOIo2ufSYUb3xd8KiWVyWR5Momo1AR3PcJG8erGb8mefObABN3RWuHi2eF6nHypiODHMdObzTKNnmntAaCZuyurVtTiGzynhO5WswjEN9OxEPJ72SojQA+A+BP5o2MsXEArgcwHsApAJYyxk7nnEdvA8hj9PtJVcAWF+k2+WDvongyTzR5AapjLjN3TQDtuDTXGIdlTIayKafSKRP1rfY+E1Vha30YqNjkObiznUqR9iBDKH2+VYQFh/PCKHlrruGcb+GcbxXsmgvgCc55K+d8N4AdACb7aatQqaprwSW/Xo59x5q8h1B61IZkTPz5a7Z9eGHDIcz7yyrpflV2Vjfgst+8iWMNasnQ2hNJzHngbeO7XZ6VOQ+8jWabNVwdc9tYvsuEpnTmp26uydhmZ5O374+VRgchr0rWZCjLhRGZDs1F/K4MFaSCko8OVUBNMemIjtfBAPabvh/QtmXBGJvPGFvDGFtTXd3xlqHzy3PrD2LPsSb8/f09rqNrmGGuSZeXzswMkG89vg7v7DjqXNCBP6/YhV1HG/Ha5iNK5Q/XtuBog3yGpPmynWhqx9YjqVm7bl65fzrnDOF2eQilfX2qIZRu+MuXJknXp/30macYjf32c2fi+ZsvUO4foLbGa5AqRaAKSlacfPYVf+qmaZE4Xp2INISSMbaUMbZR8G+u3WGCbcLrxTlfxDmfxDmf1K9fP9V+Fwz6j5vk7qNrdFu82TRRXqKeJ92Jk0Je+i1mhIBKzpdn/HHEak8uL3Gvw8wYNyDVpqVRmePV8acyHWYX2+7G9HvFuAHSds/SlvVjAD5zzhBjmT8ZVtu2imNbkELfwK2o8mqu8cqkEWopmHUCMcmr1BGiJu84ijnnV3io9wCAoabvQwAc8lBPwaOPe87dT4bStSDzTRSkLV017a1X9Ik4Mieivlk/v2z7MReWT+/X/rrok0yjcuvoE5trVNpXw0kYq0ZrWMu5vXv8rAzFWLBCPmtilrQruTXrRJ0rJ6xR/AKA6xljZYyxUwGMBvBBSG11aPRBkeRc6VXZjK4B6wO+PZEU5o33SrF1AVALfiMPRKmSzWStN+ogkLLtye4FSLoJS9sym7w0dw3L+KverhpOaQVUq7M+vNR+U3kZN6dRxJjU7OSFMMMQ/RC1Td7X+zhj7BoAvwfQD8BLjLH1nPNZnPNNjLHFADYDiAO4mSJrsqlracedL24GADyycg8eWbkHAHBKz3Icqm2xOTJFWsgDmw7VYs4D7wTav+JYtg4wYsFLxuckz14IWsSqXcfw+UXv47XvTM/Isugk5PWtRxtaM9qVYX0ozPvLKpSVFKGLCxOW+c3KLMxKimVCXlKP9tcsRFUGMgfQp1spjjtkZ5Rdsz7dUpObBvfu4twYnGe8yggid00sxtBo4xy3b18UtmrPyH7d3Lfj+ghvdeRzdM2znPMhnPMyzvkAzvks0767OeejOOdjOOev+O9q4XFYIsjvvHoC/vKlScJ9ZnSbfIJzVNbIHwqf0p1xNlx8erY/xGnAqk5MevnjSgDASouztkiQDz+jfpevNtbidS1xV8sYMpbWBrlle9fStD70rctH44EvnK2VUzebqDrXXvn2RVj8tWm2ZWTX7OLT++GheefgO1ecrtRWdnSN8zGZ0TWZuHEgWvPmyPjbf56nVM6pOqdrGiV5OxmK8IfsZ+1WVowrNAegHWmbvL2o+bSCkL/gtJOztjkNeK8ZBHUcNXmX1cv6I746Yk1Q/01EmmL/HqlZi9efNxRjtDcSN2YTVZv8gJPKMdlhjVa7xGizJwzKSMlg254XxyucQ0dVUPVzXCqIGFPSji3167NOc23VUTLXhNg+Cfk8RHU5NMNck+S2g9O6MLiIUoGT1WnAuxXC1uK6Jqdqk3dqPwh3hGoSMf2SyqNMmFE2vS04ZC85bgVY9qIhzpgfMH4EpizpmwpulyUMtCH3lQRQh3dIyEfA8cY22wRgqvf+kbqUiSbJgZomuQ1XZTCVFmfbrZ2ErHn/sYZWtMYTONbQajjTapva0dyWyHoVPVLXgngiieNan2UhlI5C3jJ4pJq8izGmC2e7QxiYyXavHkMZ5Cu57MHodr1Up8lQImyvjYvmnVZAs++D6E2M2X731o5/OrTjlfDGOXe+jvNG9MY913xCuF9VGBw4kVoZKck5fvT0x9JyKpp8meD13tlck/587l1LcfnY/lhWUYUZ4wbgz1+ahDN/8RoG9+pixJ4DwLYj9Zh534rMehxCKFUJInzU0NAFVembUj+P/cNAlKAsyHFsFcanD+iObUcaUOLgCT+lZ3nG9+w4eZW27RzOwdvkVbGrLuw5H3ZwAONOOcm2DC3kXYCs3nNCus+tguMkjFU0pjLBxCGnAW8VqssqqgAAr5tmsJqX6OMcGbnhjXqkGrg7c02rJBxPVI3o3BhjhiS2a5vBGoWT4pVvX4RBmhA1Yvsz6pdW6RrrNXvsq1NxrKENZYI3MjPP3ZI5AzYrLNVS76fPPAUvbJBPcbEe7+Ycw16MmzFgw+0zUVnXjFN6paONch1oyTnHrPED8fp3pmOGRcEx+kSafGEiu0/dvnI7mTXMmnyMiQWceOKTk5B1rzmLDpHb5B3qsnyXmcBE8fKiNlOO1/S1srP5ixy0J3crRS8tP7tuTshIaxDgSLYmHu3bvcwxnS2QvYC4kybfrSz10Bjcq4vxwLY3ZakT8lw7AEDPriXo2VVt3VwRQS4wNlpxkfagIZt8hARlV3WaABXLEPLiukU5RJzMH+kZqW5Ggno7bqN3WtvFmrxI+MtMRIaGbt6WFun6BmGopVnC6dWHpaF5XhvVIoaz4uQFR1ixi+Zy8yBze59n9EGhbVntOY+uyW1zWZCQzyGVtc2oOFxnfF++tUpYzu1N+OZW+8RuxQpCvjUuEIQOd+fKnUfR0p5QtoXLSnkNobTGwK/eK176sEVgxhEJSZO1xhYGJrTdZ74FcG2bO1R/e7/hq+n27M01rhczcVHWl5AXmds81xYuUTteScjnkGm/fAOz70+nyr13iShLs/sf/IPd9uu6FsVYOnGVpO6JQ3plbXPKaHnLY+vws+c2Oj4MzG2K7ePBaPKy4qKp86K3nxhjaaHH086yq84clFU2HYUjDidM59tR+zGN+izdmixJqOXGx3zJmPREN6tjNjutQeZ3fZLcRaP7KrWVq+gaFYIwjwWRd0Yppp8cr50LPxqOiOJYDA/fMAnn3rXUYiNODepN/z0L3cqKsfkXs9Ce4DipvBiMMTy19gCeWXfQtu5tR+pdCWNRSdnhQdpDrYgW0+peVpwR/z785G7Ys3COsD+ZieVSn83XVpZUTYZo9q/etiilg5vZwI/8l3wph+xUw5n1njeiD/YsnIPFq/fjidWp7OG20TWMYc/COZhyz1IcqbOfbayf87cuOw0PvLHD6TQsiN/EgiaYLJTOlZDjtZMRtJAviqUHlFl5ijGGBOdGe+ap+4C6rd2VkBeGJgajybtBpMl3Kys2adQ20TWSNxM/v5ruN1EV3kFdG6e0BnqYo/k3kr29uCUdZhrM/Z4dJ++fXC2QFuY7DZlr8pCg32KLYjFjAJgHdcwYZOLjlO5vxlzFpwuXk5McHqqQF/S5e1mx8mgTTYbKEP7aeSpr8lo51SyiQQkfp7QGeo468+agfha9aS9KjZ8Zr25MI4Gs8RoxJOTzkKCTFRXHWDqkz/QEcWxH8f4WCRzzDNy/vbtHqZ7G1jj+vGKXoc3mepnZ7uXFwuganYyIG1EiM0H4paoA038X1ezIQa0bYO3fKot/R38DNLfGeTC26nQfAqsqA9mlz8vcNZSgrHMRvCbP0L2sGIN7dcHd10wwtv/6uokY2qeLMG8NoD6QRSaGBYIZuJxz25mkv1pSgbtf3oJXNx02yqsimrHrlh/MGmOKf5eXY5DMjDX9bgN7lqN/jzL8bM44pbadFlABUksTThzSEwBwx6fU6nVs1+Fm0x8CVg3/c5OGomtpET41UZz8zs0zSCXtxi2XnpbxXRhCqd6kErrPyi8q44jMNZ0M6wDRc4R7JcYYiotieHfBZbjKNCjnnjUYb//wMukgU73BRWaV2uZ2cZ027dS3pBan1nOMuxEUuvDzwp6Fc7Bn4RycN6KP+2gYiX26vKQIH/zkCttsoj++cqzx2SntMgDceNFIvHDLhQCAmeMHZjiFveIkXw1N3myuAceoft2x+RezMbRPV+FxKouB6Nda5W3n+7PGZHwX5pNn1u/+Rafo3j61r31eemvabgqhJLKIW0I/oor/VZWxIsEktfML73jNlGTRGt3Y5LuUBhNDkI6uEfkOsgW62XThehKb6Wd263gNCidBaDheTeeu8rMoCXntb3BvrsGOFAbxGAjDV0Tmmk5Ge9zfhJSgUL2ZReGIbgSe3kyR4eRzL+S7lQazgLksXj2jDEtb380y2e3PZD4/XdCF6Wz2Qkxkk1c4TjS5TtqGF8er6yO8IVJKnLOjWusIsEMeoBDKHJBMctzxwibl8u1ZUtOflPf6kFC5ORPJJC79zZtZ29+xrAKVLi+Prklr8voOlV6m6BKUkLdxvGas8iS4qG6vs1mA6EstCrJL5AV+V+kSIcrU6QfVelQfKowxV/M60vszCxTqQt6EiYM1zfj7+3uVy5+pzT59aN65uOvqCVk37/zpI121r69o5Bb91rz6rFMwe/xAYZmNB+vQbJMb34pdVAjLMtcoV+uYfdHMucN748mbpmHswB5Y+BlxumcRj391KuZPH4neXUvQt3sp5k8fiX/eOMXYrxKa970Zp+PJm1LL0P3nBaca29OrZAW3sLUfbr1iNL51+Wjje8ZPEbDMijGGBZ8ci/GWdLz3XjcRv9eWWbTiJ63BrVeMxucmDZH0BeiqKQxM0o552yk9y3HzpaNs2xPVYQ6ACBsS8jnAzQo4M8YNMAb87AkDMW/q8Kyb90ezx2YfKIExH/Y+7e7sVlaMB794jrc6LNUJNXnDpp36ng6hVJcmThkNe5QVY9rI1BKH351xOs4b0QdLbp2O6ycPk3Q2e9OYgT3w4yvPSJlrGMOPrzwDYwa6yyz4zctH4zwtVUH3smIj3YTTUoi55qyhvfDdGel1Yq2O1yBIv8EBN108Ci9966KM/Z+bNFRpfWId1fu8V9dS3HvdmcJ9xUUx/NL04HeK8PrEkJ74wSz78Siq4YtThjv2MyhIyOcAN44l0aIP1nvXTc4PP2/C+s3JWHB5RkSrQKVt8pnmGjdCXjd3yEhybji07RZRicr/kWWqipisxGXmzwH30cu9JRK+gUxcMvtYmCwazOxLUeh7xL8pCfk8QySs/CQvCmJhhiCTJ4k1+RTW6Bo3Y9ZJUCQ50K4ZvItt1H5RaKQTQcgW61tM1FivZhgzP4NOaxCE05qDG/1hYI4LzgSdgiQMSMjnADev4MUKmrwbfGnyerrcgO7ju1/egsa2uKCd1N9HVu4B4FWTdxLyHL21xSP0hTBE6NX071EuLWNloLYalJfrpJ9hsUKcfC6xy2nT2+e8Dae2VBAtkKI6W9iOJE/7RdoSSeE92LeH6fyVFHlyvBY8bnxpJUJNXp3vzzzdMS3s4q9Nw1Oa888OY+EL7fujX5ZnM1Rld3X28n9WLdFtWoNnv3G+o9+Dc+B/PncWfvmZT2DsQPl6m8VFMdz/+bOwWOH66Dz+1am47/NnorzEe4RPzKNN/umvT8MT86d6bhcAHpp3Dr4weRhuv2ocxg7sgf+YOhznDO+VUUYXdt1Ki/Dr6yY61rn0uxfjrqvVnIturTULP/MJPCBwyPp5QF5xRiqtdiLJ0dCSVkREVf71hvOMz9YH1CP/dZ5xjD4pKurnNgn5HKBy8+mRDGJNXn0U3HD+CMebavKpfTBJkqfcTNomn2p/umJOcTvqWrJnwlq1JbeToc4e1ttRk+fg6NOtFF+QOVpNXH32YAw2rQnqxCm9uuCas8XRGqoUexTy5w7vg6maQ9krsycMwi8/8wl8+cJTseTW6bjz6gnZGUm1v/OmDjeWOLTjtP7dMW+qmnPRrSZ//eRhOLl7dh/8mLq+epF6xFr/k9Jvedbb7pIx/Q3NXXeqR/1uRkI+B6gMXP0GFa+1qg5jDA2tcdN373XpGrY+CIOwndY0iYR85nfdOevGDqxik89njBmvUat9EpLc8loXAG5z7jvhJzKpxJT7yFyP0+9h94ASZSqNgoIQ8q3xBA6caEJzm3q8di5RGbi6YBNppG4GQYwhU8j7GJVBD0IAONbYlrXN+qbTFk/iSF2L4ShVQcUmn88YTuf8CJPPIgwnvH5fBRW55cdcY07SZ44Ac6rSrudhrvbkhoIQ8lsq63Hhr5bj/V3Hou6KEBUNY3DvlHlgZL/uWfvcCFkG5jp2W8YQrU8jThYnofLC0Ybs1YKsms7vlm3HlHuW4ZuPr1Oud9jJ9kmj8lXGjxuU+q1GakmvTh+Y/fvnE2GkBfbieBUJUD/mmhKZkHcwtojebq3KkbmGkf3s79MwKIi0BroWJ4rBzgdUhPynJg7CqL7dMG1Utn1Vv6HvvW6iMZHmxW9eiN1HG1FV34o7X9ycLsuAe6+diNnjB6aEpI9BOXvCQDz21SnGJKIgaGzNjq4JYgLQpyYOQs8uJWAAvvTXD3zXlyvu+NR4XHvOEEwa0QdP3jRNmE1z6XcvRo/yaIeqLkC9vtU99tUpSCaBeQ+vytpnrnPJrRehqS2BfoLomTe/f0mGWcWKH03ePD8lbgrTcapSf+i9+f1LsvxponTUz3z9fBysafbcTy8UhJDXNYF8mRJuRcVUwBjD+aeJHZv6IDhnWG8jzemEwT0xYXBP7DvWlCXku5UV44ozUilu/ShejDGcP8q/s9WMyAQTxLOZMYaLT++XMUA7AuUlRYYT/DyJM/y0/tFr99a5DG6xu4/MddpFPo0wpfgVjamgNHnzPepUpd53c9/Smrw+5yJNr66lSo7rICkIc43+BM3X8a2iqdq9Buu7ROPLus06CDvAXA0kkzywSUBB2XeJTHShGsbV9fLgEKUy9uV4LRI7Xp1iY0Rdz1r6kRyv/ikyzDXOUr7icF3G962H65Xb2XakXkkY1Ta3Y1d1A/YcbcTmQ3VYvee44zF2kSt2+6zx4cZ9FXngljoVh+uxbn9NIHWFmZe7M2PVToPEy3O5TaDR+cngaVYOzFlgHR2vduOWHK/BoRpj/Nqmw5h9/9t4fv1BAMCLHx3CrPtXYMnGw45tbDpUi5n3rcAf3tzhWHbmfW/hsv95C5f85k1c+cDbuOflCsdjVDR50Q1nPU7XinTNZO6Zgx3b9sJ0y+o3fjhY04xr/7gysPoAYMJg+Ws/4Z6zh/XK+BskXh4cvU0mD92EGdTbYML0tDDXOFYQ0CDyoV06JjWxSg9ciFrdKiibvJPjde+xJgDAhv21mHvWYFRUprT47UfqMXuCOJWuTmVNCwDgw301jv05UpcdQeKE7VPf2JV9fjLzTElRDOtvn4HuZeH8xH+ady6+86/1WLJJ/oAc1LMclbUtobRvx4bbZ6KxLY7zF76R87YLlUvG9Mean14hTCfgFX24ipLyOdGvRxk+/NkMJJIcf357Fxat2BVYSoi4IE5+xQ8uRf+TytBueYP4tCBL5vzpI3HduUMMB6tTtzbcMdNnj+0pDE2+SE2TL9fyROv5z90oEGGngrXri50mb7euZa+upbbJuPzQpbQIpzuEak451XlWbRj07FoSyMLeRCZBCnggrXl7vUf7dCtFvx5lpsAL72PTbN6MC8w1PcqLUV5ShB7lJY51McZwsulaOZlOe3ZxrtMPBTESVAVwFy23SIsm5N08+MOekWgr5AVeep18zoKX6ygCM/l8XYgUuuZd4tNZrj8jfJlrTIfGzeYaH0n6VJaSzAUFIeT19LzxRBLPrz+IytpmPPLubqzdeyKjXHlJqtyz6w5iR1U9NhyoAQC8vPEwqupaDNv82r0nsKu6wTiuPZHEvzccAgAcb2zDI+/uxiPv7saSjZVY+EoFjjW0gnOOFzYcUlrAWITt9GiPx0XNSSFrKHa4WaiFiIa4QupnFfTFxoMy17QLbPJe/Abmxd6jpCBs8rom//yGQ1hnsZnvWTjH+GzW9K/47Qrj85bKOky+ZxkA4O0fXmo4AfVjF63YhafWHgAAbDpUh02H0nHpAPDermP49uWn4VuPr8PXL7FfCszMZWP7442KKgAO06NtbpZcy7JPThiIzZV1zgUBzPnEIDywbLundj577hA8qV1znbuunoCfPrfR+GuH9brcculpnvpBhIe+Lq8oKZ8bZo4fiAfe2GHMDbnp4lF46K2djsf17V6Kow2pNBscKbPJrPEDcPVZg/H0hwfQr0cZbrp4FO58cbOxJKDOhMEnoVupvfgc2ic1U/zGi071cFbBUVBCXpT8ykxcIcZK5LytrLWfobazqgEnGlNtV9rMZps0vDfWaG8X37hkFH44eyxGLHgJgJMmLzcV5Tpk8I/zzrXd/4+vTDFmNZrTK7zxvYtx2f+8BQA4b0RvrN5zAo/dOAXTRp2MU2972Sj3hcnDcNfVE1AUY/j1Z8/Mql/PbDhv6nDj2okwh8SZH/REfrBn4RzMvC91P4jSa7thwuCeGb/xgk+OxYJPOi+RueanMzD57qWoqm8F55kOUHN9X7kwW0i/+M2LsrZZ6dmlJC/uPV9XlzH2a8ZYBWPsI8bYs4yxXqZ9tzHGdjDGtjLGZvnuqQ16CKXTRBiVOPow19isbU4/hKyvlvY2+dRfFcdr1JRKHJ7m36aLpgG1JpJZD6lEMhnIhKZ8NmMRKfSxVlIc3W+Vzi8TdaBjePi1yb8OYALnfCKAbQBuAwDG2DgA1wMYD2A2gD8wxryvqOCALhScMhGqZDU0C2IdlUkNKkuZ1ZjqtjqJVDRy0Y2Yb8LMKqD17+Z+dtNefZtas7OGBjVrOd+uC5GNLuSd1ucNk3yZsBQmvsw1nPPXTF/fB3Cd9nkugCc4560AdjPGdgCYDOA9P+3J0B0vFYLZqwdONOHptQdx8Zh++PWrWx3r0h2sADD57qUYfnJXYb1mGlrj+O7iDQBSTl0ZtSZzkptkakZ0TR7Y5J2wxjwXxxgSSZ7xxqHbYkVLAQaVf4jSG+Q/RnSNT5t8EETtHA2TIG3yXwbwL+3zYKSEvs4BbVsWjLH5AOYDwLBhzqv2iLCLpJh53wo0tSVw39JtSnXp64wCQFV9K6rq3U9skrH4pmm49Yl1YIzhyxek7HyL/uNcPLF6v+1x91wzAXe9tAWjB2QnqopSY/3ilGF4o+IIhvXpiuJYDIdrWzBmYA98adpwYxZiaVEMrfFkRj+/fflo7KxuxMxxKUfZpWP6YfnWagDAN7UVslS4auIgw9lmhWR8fnLvdRPx3s5USnB9ZmmUD+T7Pn8W7l+6Df17BDsHQIXbPjnW08RJtzgKecbYUgCi6aA/4Zw/r5X5CYA4gH/qhwnKC5+VnPNFABYBwKRJkwJ/njZFvJDInImD8NJHlQBSy4G9+YNLM/bPHD8QM8fbz7Y9e1hvPP3184X7orRKDDipXOiA+sXc9NqeeuSEWcgPP7kbnr/5AuP7r66diMn3LENJEcMoQT59Gf/7/86R7qMcNvnJ5yYNxecmDQWQ1uSjNNdMG3Uypo1SX883SL52sXoknh8chTzn/Aq7/YyxGwBcBeBynl794QCAoaZiQwAcsh7bGSgLacapTr7bnlWWM9TjpEkwdy50m3yEMr5T4De6ZjaAHwH4NOe8ybTrBQDXM8bKGGOnAhgNoOOs5BAgYU/K6ShC3i6ySdf2i/L8XIhgyQfHa2fAr03+fwGUAXhd08Le55zfxDnfxBhbDGAzUmacmznn+bkAa8joqRTCIt9tz/qKRnb+VP1B1T3i1Y+I3FJeUgSgnZzkIeM3ukY6jZBzfjeAu/3U74ZbLj0NL39ciamjTsbrm4/gaENqgsOs8QOwYttRXDi6L97eXo2bLh6FmqZ2VNe3or41jpnjBuD59Qcxa/xAvFFRhXiCY0ifLqisacG2I/UY1b87Tiovwdvbq3HGoJMw4KQyvLm1GheN7of500fiu4vX4+TuZdh7rBEMwB++eC6eXXcASzYexsndy/CD2WMwdlAPjBsUTupbxhi+NG043txajfnTR4bShh/+/KVJeHbdQQzt0wW/+eyZRvpVM93LivGj2WMxc7zYieqVn39qHM6LKEkakeLZb5yPLZXi6LR/3jgFSzYdDj1BV2eHWRdRjpJJkybxNWvWRN0NgiCIDgVjbC3nfJJoHxnDCIIgChgS8gRBEAUMCXmCIIgChoQ8QRBEAUNCniAIooAhIU8QBFHAkJAnCIIoYEjIEwRBFDB5NRmKMVYNYK+PKvoCOBpQd/KRQj8/gM6xUKBzzC3DOef9RDvySsj7hTG2RjbrqxAo9PMD6BwLBTrH/IHMNQRBEAUMCXmCIIgCptCE/KKoOxAyhX5+AJ1joUDnmCcUlE2eIAiCyKTQNHmCIAjCBAl5giCIAqYghDxjbDZjbCtjbAdjbEHU/fEKY2woY2w5Y2wLY2wTY+zb2vY+jLHXGWPbtb+9Tcfcpp33VsbYrOh6rw5jrIgxto4x9qL2vaDODwAYY70YY08xxiq033NaIZ0nY+w72j26kTH2OGOsvBDOjzH2V8ZYFWNso2mb6/NijJ3LGPtY2/cAi3KVes55h/4HoAjATgAjAZQC2ABgXNT98ngugwCco33uAWAbgHEA7gWwQNu+AMCvtM/jtPMtA3Cqdh2Koj4PhfP8LoDHALyofS+o89P6/iiAG7XPpQB6Fcp5AhgMYDeALtr3xQD+sxDOD8B0AOcA2Gja5vq8AHwAYBoABuAVAJ+M6pwKQZOfDGAH53wX57wNwBMA5kbcJ09wzis55x9qn+sBbEFqQM1FSmhA+3u19nkugCc4562c890AdiB1PfIWxtgQAHMA/MW0uWDODwAYYychJSweBgDOeRvnvAaFdZ7FALowxooBdAVwCAVwfpzzFQCOWza7Oi/G2CAAJ3HO3+Mpif9/pmNyTiEI+cEA9pu+H9C2dWgYYyMAnA1gFYABnPNKIPUgANBfK9YRz/1+AD8EkDRtK6TzA1JvldUA/qaZpf7CGOuGAjlPzvlBAL8BsA9AJYBazvlrKJDzE+D2vAZrn63bI6EQhLzI1tWh40IZY90BPA3gVs55nV1Rwba8PXfG2FUAqjjna1UPEWzL2/MzUYzUK/8fOednA2hE6jVfRoc6T80mPRcpE8UpALoxxubZHSLYlrfn5wLZeeXV+RaCkD8AYKjp+xCkXh07JIyxEqQE/D85589om49or4DQ/lZp2zvauV8A4NOMsT1ImdUuY4z9A4VzfjoHABzgnK/Svj+FlNAvlPO8AsBuznk157wdwDMAzkfhnJ8Vt+d1QPts3R4JhSDkVwMYzRg7lTFWCuB6AC9E3CdPaB74hwFs4Zz/1rTrBQA3aJ9vAPC8afv1jLEyxtipAEYj5fDJSzjnt3HOh3DORyD1O73BOZ+HAjk/Hc75YQD7GWNjtE2XA9iMwjnPfQCmMsa6avfs5Uj5jwrl/Ky4Oi/NpFPPGJuqXZ8vmY7JPVF7s4P4B+BKpCJRdgL4SdT98XEeFyL1WvcRgPXavysBnAxgGYDt2t8+pmN+op33VkTowfdwrpcgHV1TiOd3FoA12m/5HIDehXSeAP4bQAWAjQD+jlSESYc/PwCPI+VnaEdKI/+Kl/MCMEm7NjsB/C+07AJR/KO0BgRBEAVMIZhrCIIgCAkk5AmCIAoYEvIEQRAFDAl5giCIAoaEPEEQRAFDQp4gCKKAISFPEARRwPx/qLRultpzsroAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "def play_episode(env, agent, max_episode_steps=None, mode=None, render=False):\n",
    "    observation, reward, done = env.reset(), 0., False\n",
    "    agent.reset(mode=mode)\n",
    "    episode_reward, elapsed_steps = 0., 0\n",
    "    while True:\n",
    "        action = agent.step(observation, reward, done)\n",
    "        if render:\n",
    "            env.render()\n",
    "        if done:\n",
    "            break\n",
    "        observation, reward, done, _ = env.step(action)\n",
    "        episode_reward += reward\n",
    "        elapsed_steps += 1\n",
    "        if max_episode_steps and elapsed_steps >= max_episode_steps:\n",
    "            break\n",
    "    agent.close()\n",
    "    return episode_reward, elapsed_steps\n",
    "\n",
    "\n",
    "logging.info('==== train ====')\n",
    "episode_rewards = []\n",
    "for episode in itertools.count():\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent, mode='train')\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('train episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "    if np.mean(episode_rewards[-3:]) > 16.:\n",
    "        break\n",
    "plt.plot(episode_rewards)\n",
    "\n",
    "\n",
    "logging.info('==== test ====')\n",
    "episode_rewards = []\n",
    "for episode in range(100):\n",
    "    episode_reward, elapsed_steps = play_episode(env, agent)\n",
    "    episode_rewards.append(episode_reward)\n",
    "    logging.debug('test episode %d: reward = %.2f, steps = %d',\n",
    "            episode, episode_reward, elapsed_steps)\n",
    "logging.info('average episode reward = %.2f ± %.2f',\n",
    "        np.mean(episode_rewards), np.std(episode_rewards))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
